In [1]:
import xarray as xr, matplotlib.pyplot as plt, cartopy, pandas as pd, numpy as np, regionmask, re, glob, os, geopandas as gpd
xr.set_options(keep_attrs = True)

from dateutil.relativedelta import relativedelta
from datetime import datetime, timedelta

from IPython.display import clear_output

xn,xx,yn,yx = [-10,5,30,45]

sf_n = gpd.read_file("sf_northern")
sf_s = gpd.read_file("sf_southern")

ERROR 1: PROJ: proj_create_from_database: Open of /rds/general/user/cb2714/home/miniforge3/envs/wwa/share/proj failed


In [2]:
# quick method for area-weighted averaging (will add to WWA main but need to generalise dims first)
def awmean(da): return da.weighted(np.cos(np.deg2rad(da.lat))).mean(["lat", "lon"])

def wrap_lon(ds):
    
    # method to wrap longitude from (0,360) to (-180,180)
    
    if "longitude" in ds.coords:
        lon = "longitude"
        lat = "latitude"
    elif "lon" in ds.coords:
        lon = "lon"
        lat = "lat"
    else: 
        # can only wrap longitude
        return ds
    
    if ds[lon].max() > 180:
        ds[lon] = (ds[lon].dims, (((ds[lon].values + 180) % 360) - 180), ds[lon].attrs)
        
    if lon in ds.dims:
        ds = ds.reindex({ lon : np.sort(ds[lon]) })
        ds = ds.reindex({ lat : np.sort(ds[lat]) })
    return ds


def decode_times(ts):
    
    # Method to manually decode times
    
    inc = re.sub(" .+", "", ts.time.units)
    startdate = pd.Timestamp(re.sub(".+since ", "", ts.time.units)+' 00:00:00.000000').to_pydatetime()
    
    if inc == "years":
        new_times = [np.datetime64(startdate + relativedelta(years = i)) for i in ts.time.values]
    elif inc == "months":
        new_times = [np.datetime64(startdate + relativedelta(months = i)) for i in ts.time.values]
    elif inc == "days":
        new_times = [np.datetime64(startdate + relativedelta(days = i)) for i in ts.time.values]
    elif inc == "hours":
        new_times = [np.datetime64(startdate + relativedelta(hours = i)) for i in ts.time.values]
    else:
        print("TBD: " +inc)
        return
        
    ts = ts.assign_coords(time = new_times)
    
    return ts
    

def main_polygon(gdf):
    gdf = gdf.dissolve().explode(index_parts = False)
    gdf = gdf.loc[gdf.geometry.area == gdf.geometry.area.max()]
    return gdf

# Shapefiles

In [None]:
# northern region
regionmask.defined_regions.natural_earth_v5_1_2.land_50.to_geodataframe().clip((-9.5,40,-7,44)).set_crs("epsg:4326").to_file("sf_northern", mode = "w")

# southern region
regionmask.defined_regions.natural_earth_v5_1_2.land_50.to_geodataframe().clip((-10,34,-3.5,39)).set_crs("epsg:4326").to_file("sf_southern", mode = "w")

# Gridded obs data

## CHIRPS

In [29]:
fpath = "/rds/general/user/cb2714/home/99_ephemera/chirps/"
! mkdir -p $fpath

# list the directories in the MSWX folder so we can loop over them (one per year)
fl = sorted(glob.glob("/rds/general/user/cb2714/home/00_WWA_project_folder/live/data/chirps_05/*"))

In [32]:
for fp in fl:

    # specify name of new file
    y = fp.split(".")[2]
    print(y)
    new_fnm = fpath+"chirps_"+y+"_"+"_".join(str(x) for x in [xn,xx,yn,yx])+".nc"

    # check if the file exists already - don't recreate it if it does
    if os.path.exists(new_fnm): continue

    # use CDO to stick all days together & cut out the box
    ! module load CDO/2.4.4-gompi-2024a; cdo -sellonlatbox,$xn,$xx,$yn,$yx $fp $new_fnm

clear_output(wait = False)
print("Done.")

Done.


### Compile

In [None]:
# new_fnm = "data/pr_chirps_"+"_".join(str(x) for x in [xn,xx,yn,yx])+".nc"
# ! module load CDO/2.4.4-gompi-2024a; cdo -sellonlatbox,$xn,$xx,$yn,$yx -mergetime $fpath/*.nc $new_fnm

### Extract time series

In [22]:
pr = xr.open_dataset("data/pr_chirps_-10_5_30_45.nc").precip.rename("pr")

for rnm in ["n", "s"][:1]:
    rm  = regionmask.mask_geopandas(eval("sf_"+rnm), pr)
    ts = pr.where(rm == 0, drop = True).mean(["latitude", "longitude"])
    ts.to_netcdf("data/pr-"+rnm+"_chirps.nc")

## CPC

In [None]:
fpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/mediterranean-storms/obs/cpc"
# subsetting on climexp failed so will use CDO instead
# ! wget https://climexp.knmi.nl/dyn_links/prcp_GLB_dailyNC.nc -O $fpath/cpc_global.nc

In [109]:
new_fnm = "data/pr_cpc_"+"_".join(str(x) for x in [xn,xx,yn,yx])+".nc"
! module load CDO/2.4.4-gompi-2024a; cdo -sellonlatbox,$xn,$xx,$yn,$yx $fpath/cpc_global.nc $new_fnm    

[32mcdo    sellonlatbox: [0mProcessed 4461350400 values from 1 variable over 17212 timesteps [37.58s 70MB]


### Extract time series

In [18]:
pr = xr.open_dataset("data/pr_cpc_-10_5_30_45.nc").precip.rename("pr")
pr = pr.assign_coords(time = pr.time.dt.floor("D"))

for rnm in ["n", "s"]:
    rm  = regionmask.mask_geopandas(eval("sf_"+rnm), pr)
    ts = pr.where(rm == 0, drop = True).mean(["lat", "lon"])
    ts.to_netcdf("data/pr-"+rnm+"_cpc.nc")

## E-Obs

In [None]:
outpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/mediterranean-storms/obs/eobs"

In [None]:
# ! wget https://knmi-ecad-assets-prd.s3.amazonaws.com/ensembles/data/Grid_0.25deg_reg_ensemble/rr_ens_mean_0.25deg_reg_pre1950.nc -O $outpath/rr_ens_mean_0.25deg_reg_pre1950.nc
# ! wget https://knmi-ecad-assets-prd.s3.amazonaws.com/ensembles/data/Grid_0.25deg_reg_ensemble/rr_ens_mean_0.25deg_reg_v32.0e.nc -O $outpath/rr_ens_mean_0.25deg_reg_v32.0e.nc
! wget https://knmi-ecad-assets-prd.s3.amazonaws.com/ensembles/data/months/ens/rr_0.25deg_day_2025_grid_ensmean.nc -O $outpath/rr_0.25deg_day_2025_grid_ensmean.nc
! wget https://knmi-ecad-assets-prd.s3.amazonaws.com/ensembles/data/months/ens/rr_0.25deg_day_2026_grid_ensmean.nc -O $outpath/rr_0.25deg_day_2026_grid_ensmean.nc

clear_output(wait = False)
print("Done.")

### Subset & compile

In [None]:
# subset all chunks of data
fl = sorted(glob.glob(outpath+"/rr_*.nc"))
for fp in fl:
    fnm = fp.split("/")[-1]
    if fnm == 'rr_ens_mean_0.25deg_reg_v32.0e.nc':
        yy = "1950-2025"
    elif fnm == 'rr_ens_mean_0.25deg_reg_pre1950.nc':
        yy = "1920-1949"
    else: 
        yy = fnm.split("_")[3]
        
    new_fnm = outpath+"/pr_eobs_"+yy+"_"+"_".join(str(x) for x in [xn,xx,yn,yx])+".nc"
    if os.path.exists(new_fnm): continue

    ! module load CDO/2.4.4-gompi-2024a; cdo -sellonlatbox,$xn,$xx,$yn,$yx $fp $new_fnm    

In [None]:
# subset just 1950-2024 for main file to avoid duplicates (ends in June 2025 for some reason)
! module load CDO/2.4.4-gompi-2024a; cdo -selyear,1950/2024/1 $outpath/pr_eobs_1950-2025_-10_5_30_45.nc $outpath/pr_eobs_1950-2024_-10_5_30_45.nc; rm $outpath/pr_eobs_1950-2025_-10_5_30_45.nc

In [None]:
# merge all files into one
! module load CDO/2.4.4-gompi-2024a; cdo mergetime $outpath/pr_eobs_*_-10_5_30_45.nc data/pr_eobs_-10_5_30_45.nc 

### Extract time series

In [3]:
pr = xr.open_dataset("data/pr_eobs_-10_5_30_45.nc").rr.rename("pr").dropna(dim = "time", how = "all")

for rnm in ["n", "s"]:
    rm  = regionmask.mask_geopandas(eval("sf_"+rnm), pr)
    ts = pr.where(rm == 0, drop = True).mean(["latitude", "longitude"])
    ts.to_netcdf("data/pr-"+rnm+"_eobs.nc")

    ts.to_pandas().to_csv("data/pr-"+rnm+"_eobs.csv")

## ERA5

In [None]:
! wget https://climexp.knmi.nl/dyn_links/era5_tp_daily_eu_extended_-10-5E_30-45N_-999-lastyear_su.nc -O data/tp_era5_-10_5_30_45.nc

### Extract time series

In [4]:
pr = xr.open_dataset("data/pr_era5_-10_5_30_45.nc").tp.rename("pr")

for rnm in ["n", "s"]:
    rm  = regionmask.mask_geopandas(eval("sf_"+rnm), pr)
    ts = pr.where(rm == 0, drop = True).mean(["lat", "lon"])
    ts.to_netcdf("data/pr-"+rnm+"_era5.nc")

    ts.to_pandas().to_csv("data/pr-"+rnm+"_era5.csv")

## MSWEP

### Subset & compile

In [90]:
# extract subset & compile per year
fpath = "/rds/general/user/cb2714/home/99_ephemera/mswep/"
! mkdir -p $fpath

# list the directories in the MSWX folder so we can loop over them (one per year)
fl = sorted(glob.glob("/rds/general/user/cb2714/home/00_WWA_project_folder/live/data/mswep/*"))

for fp in fl:

    # specify name of new file
    y = fp.split("/")[-1]
    new_fnm = fpath+"mswep_"+y+"_"+"_".join(str(x) for x in [xn,xx,yn,yx])+".nc"

    # check if the file exists already - don't recreate it if it does
    if os.path.exists(new_fnm): continue

    # use CDO to stick all days together & cut out the box
    ! module load CDO/2.4.4-gompi-2024a; cdo -sellonlatbox,$xn,$xx,$yn,$yx -mergetime $fp/*.nc $new_fnm

[32mcdo(1) mergetime: [0mProcess started
[32mcdo    sellonlatbox: [0mProcessed 298080000 values from 1 variable over 46 timesteps [5.97s 122MB]


In [92]:
# compile
new_fnm = "data/pr_mswep_"+"_".join(str(x) for x in [xn,xx,yn,yx])+".nc"
! module load CDO/2.4.4-gompi-2024a; cdo -O -sellonlatbox,$xn,$xx,$yn,$yx -mergetime $fpath/*.nc $new_fnm

[32mcdo(1) mergetime: [0mProcess started
[32mcdo    sellonlatbox: [0mProcessed 387247500 values from 1 variable over 17211 timesteps [8.76s 117MB]


### Extract time series

In [21]:
pr = xr.open_dataset("data/pr_mswep_-10_5_30_45.nc").precipitation.rename("pr")

for rnm in ["n", "s"]:
    rm  = regionmask.mask_geopandas(eval("sf_"+rnm), pr)
    ts = pr.where(rm == 0, drop = True).mean(["lat", "lon"])
    ts.to_netcdf("data/pr-"+rnm+"_mswep.nc")

## Dataframe to more easily read into R

In [None]:
da = xr.merge([xr.open_dataset(fnm).rename(pr = fnm[10:-3]) for fnm in sorted(glob.glob("data/pr-s_*.nc"))])
da.to_pandas().to_csv("data/pr-s_all-obs.csv")

## Time series for climate explorer

### rx1day

In [32]:
for rnm in ["n", "s"]:

    rnm_long = {"n" : "northern", "s" : "southern"}[rnm]

    for fnm in glob.glob("data/pr-"+rnm+"_*.nc"):
        ds_nm = fnm[10:-3]
        
        ts = xr.open_dataset(fnm).pr
        rx1day = ts.sel(time = ts.time.dt.month.isin([10,11,12,1,2,3])).resample(time = "YE-JUL").max()
        df = rx1day.assign_coords(time = rx1day.time.dt.year).rename(time = "year").to_dataframe()

        # csv for climate explorer
        csv_fnm = "med-storms_rx1day-ondjfm_"+rnm+"_"+ds_nm
        csv_fullpath = "ts-obs/"+csv_fnm+".dat"
        if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)

        # create header lines for upload to Climate Explorer 
        str1 = "# contact :: "+ds_nm.upper()+" Rx1day-ONDJFM precipitation ("+rnm_long+" region) - Mediterranean storms, c.barnes22@imperial.ac.uk"
        str2 = "# rx1day [mm/day] Annual maximum of October-March daily precipitation (labelled at end of season) averaged over "+rnm_long+" region at "+csv_fnm+".dat"
        head = "# year rx1day"

        # make .dat file
        ! echo "$str1 " > $csv_fullpath
        ! echo "$str2" >> $csv_fullpath
        ! echo "$head" >> $csv_fullpath
        df.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)

In [3]:
for rnm in ["n", "s"]:

    rnm_long = {"n" : "northern", "s" : "southern"}[rnm]

    for fnm in glob.glob("data/pr-"+rnm+"_*.nc"):
        ds_nm = fnm[10:-3]
        
        ts = xr.open_dataset(fnm).pr
        rx1day = ts.sel(time = ts.time.dt.month.isin([12,1,2])).resample(time = "YE-JUL").max()
        df = rx1day.assign_coords(time = rx1day.time.dt.year).rename(time = "year").to_dataframe()

        # csv for climate explorer
        csv_fnm = "med-storms_rx1day-djf_"+rnm+"_"+ds_nm
        csv_fullpath = "ts-obs/"+csv_fnm+".dat"
        if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)

        # create header lines for upload to Climate Explorer 
        str1 = "# contact :: "+ds_nm.upper()+" Rx1day-DJF precipitation ("+rnm_long+" region) - Mediterranean storms, c.barnes22@imperial.ac.uk"
        str2 = "# rx1day [mm/day] Annual maximum of December-February daily precipitation (labelled at end of season) averaged over "+rnm_long+" region at "+csv_fnm+".dat"
        head = "# year rx1day"

        # make .dat file
        ! echo "$str1 " > $csv_fullpath
        ! echo "$str2" >> $csv_fullpath
        ! echo "$head" >> $csv_fullpath
        df.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)

### rx90day

In [34]:
for rnm in ["n", "s"]:

    rnm_long = {"n" : "northern", "s" : "southern"}[rnm]

    for fnm in glob.glob("data/pr-"+rnm+"_*.nc"):
        ds_nm = fnm[10:-3]
        
        ts = xr.open_dataset(fnm).pr.rolling(time = 90).mean().resample(time = "YE-JUL").mean()
        df = ts.assign_coords(time = ts.time.dt.year).rename(time = "year").to_dataframe()

        # csv for climate explorer
        csv_fnm = "med-storms_rx90day_"+rnm+"_"+ds_nm
        csv_fullpath = "ts-obs/"+csv_fnm+".dat"
        if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)

        # create header lines for upload to Climate Explorer 
        str1 = "# contact :: "+ds_nm.upper()+" Rx90dat ("+rnm_long+" region) - Mediterranean storms, c.barnes22@imperial.ac.uk"
        str2 = "# rx90day [mm/day] July-June annual maximum of rolling 90-day mean daily precipitation (labelled at end of season) averaged over "+rnm_long+" region at "+csv_fnm+".dat"
        head = "# year rx90day"

        # make .dat file
        ! echo "$str1 " > $csv_fullpath
        ! echo "$str2" >> $csv_fullpath
        ! echo "$head" >> $csv_fullpath
        df.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)

## Spatial patterns

In [29]:
fl = glob.glob("data/pr_*_-10*.nc")
for fnm in fl:

    new_fnm = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/mediterranean-storms/obs/pr-mmean_"+fnm.split("_")[1]+".nc"
    if os.path.exists(new_fnm): continue
        
    pr = xr.open_dataset(fnm)
    pr = pr[list(pr.data_vars)[0]].rename("pr")
    pr_s = pr.sel(time = slice("1990","2020")).groupby("time.month").mean()
    pr_s.to_netcdf(new_fnm)

# Station data

In [121]:
! wget https://knmi-ecad-assets-prd.s3.amazonaws.com/download/ECA_nonblend_rr.zip

--2026-02-16 14:09:46--  https://knmi-ecad-assets-prd.s3.amazonaws.com/download/ECA_nonblend_rr.zip
Resolving knmi-ecad-assets-prd.s3.amazonaws.com (knmi-ecad-assets-prd.s3.amazonaws.com)... 2a0c:5bc0:4c:ff::305:477c, 2a0c:5bc0:4c:ff::305:4b12, 2a0c:5bc0:4c:ff::305:4767, ...
Connecting to knmi-ecad-assets-prd.s3.amazonaws.com (knmi-ecad-assets-prd.s3.amazonaws.com)|2a0c:5bc0:4c:ff::305:477c|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1386713464 (1.3G) [application/zip]
Saving to: ‘ECA_nonblend_rr.zip’


2026-02-16 14:10:09 (58.6 MB/s) - ‘ECA_nonblend_rr.zip’ saved [1386713464/1386713464]



# Covariate time series

## GMST

In [None]:
! wget https://climexp.knmi.nl/data/igiss_al_gl_a_4yrlo.dat -O ts/gmst.dat

ERROR 1: PROJ: proj_create_from_database: Open of /rds/general/user/cb2714/home/miniforge3/envs/wwa/share/proj failed


## NAO

Found one reference on how to standardize - https://crudata.uea.ac.uk/cru/data/nao/CRU_index_calculation.pdf:  
"Make a timeseries of seasonal averages from the monthly values (e.g., DJF or DJFM), then calculate the mean and standard deviation of those values that lie in your chosen reference period (doesn’t need to be 1951 to 1980, can be whatever period suits your purposes)"

Suggests that the DJF series should be normalised, rather than averaging over normalised months

### ERA5 from MSLP

In [None]:
# MSLP fields to calculate manually
! wget https://climexp.knmi.nl/dyn_links/era5_msl_-28--16E_36-70N_-999-lastyear_su.nc -O data/msl_era5_-28--16E_36-70N.nc

clear_output(wait = False)

In [178]:
mslp = wrap_lon(decode_times(xr.open_dataset("data/msl_era5_-28--16E_36-70N.nc", decode_times = False)))

In [179]:
slp_n = awmean(mslp.sel(lon = slice(-25,-16), lat = slice(63,70)).msl)
slp_s = awmean(mslp.sel(lon = slice(-28,-20), lat = slice(36,40)).msl)

nao_raw = slp_s - slp_n
nao_raw.to_dataframe().to_csv("data/nao-stn_monthly_era5.csv")

In [180]:
nao_raw

### Pre-computed indices

In [167]:
! wget https://www.cpc.ncep.noaa.gov/products/precip/CWlink/pna/norm.nao.monthly.b5001.current.ascii.table -O data/nao-pc_monthly_noaa.txt

--2026-02-16 17:01:11--  https://www.cpc.ncep.noaa.gov/products/precip/CWlink/pna/norm.nao.monthly.b5001.current.ascii.table
Resolving www.cpc.ncep.noaa.gov (www.cpc.ncep.noaa.gov)... 2a0c:5bc0:4c:ff::8c5a:6513, 140.90.101.19
connected. to www.cpc.ncep.noaa.gov (www.cpc.ncep.noaa.gov)|2a0c:5bc0:4c:ff::8c5a:6513|:443... 
HTTP request sent, awaiting response... 200 OK
Length: 6863 (6.7K) [text/vnd.ascii-art]
Saving to: ‘data/nao-pc_monthly_noaa.txt’


2026-02-16 17:01:12 (3.83 MB/s) - ‘data/nao-pc_monthly_noaa.txt’ saved [6863/6863]



In [None]:
! wget https://www.cpc.ncep.noaa.gov/products/precip/CWlink/pna/norm.nao.monthly.b5001.current.ascii -O data/nao-pc_monthly_noaa.txt
! wget https://downloads.psl.noaa.gov/Public/map/teleconnections/nao.reanalysis.t10trunc.1948-present.txt -O data/nao-pc_daily_noaa.txt
! wget https://climatedataguide.ucar.edu/sites/default/files/2023-07/nao_station_monthly.txt -O data/nao-stn_monthly_ucar.txt

clear_output(wait = False)

In [70]:
# monthly from PC - NOAA
nao = pd.read_csv("data/nao-pc_monthly_noaa.txt", sep = " +", header = None, names = ["year", "month", "nao"], engine = "python")
nao["day"] = 1
nao.index = pd.to_datetime(nao[["year", "month", "day"]])

nao[["nao"]].to_csv("data/nao-pc_monthly_noaa.csv")

In [None]:
# daily from PC - NOAA
nao = pd.read_csv("data/nao-pc_daily_noaa.txt", sep = " +", header = None, names = ["year", "month", "day", "nao"], engine = "python")
nao.index = pd.to_datetime(nao[["year", "month", "day"]])

In [186]:
# monthly from stations - UCAR
nao = pd.read_csv("data/nao-stn_monthly_ucar.txt", sep = " +", engine = "python", skiprows = 1)
nao.columns = range(1,13)
nao[nao == -999] = np.nan
nao = nao.unstack().reset_index().rename(columns = {"level_1" : "year", "level_0" : "month", 0 : "nao"})
nao["day"] = 1
nao.index = pd.to_datetime(nao[["year", "month", "day"]])
nao = nao.dropna(axis = 0, how = "any")
nao = nao.sort_index()[["nao"]]

nao.to_csv("data/nao-stn_monthly_ucar.csv")

### DJF time series for ClimExp

#### NOAA (PC)

In [73]:
# csv for climate explorer
csv_fnm = "med-storms_nao-djf_noaa-pc"
csv_fullpath = "ts/"+csv_fnm+".dat"
if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)

# create header lines for upload to Climate Explorer 
str1 = "# contact :: December-February mean of NAO index - Mediterranean Storms, c.barnes22@imperial.ac.uk"
str2 = "# nao [] DJF NAO from https://www.cpc.ncep.noaa.gov/products/precip/CWlink/pna/norm.nao.monthly.b5001.current.ascii at "+csv_fnm+".dat"
head = "# year nao"

# make .dat file
! echo "$str1 " > $csv_fullpath
! echo "$str2" >> $csv_fullpath
! echo "$head" >> $csv_fullpath
nao_djf[["nao"]].to_csv(csv_fullpath, sep = " ", mode = "a", header = False)

#### ERA5

In [None]:
nao = pd.read_csv("data/nao-stn_monthly_era5.csv", index_col = 0, parse_dates = [0])
nao_djf = nao.loc[nao.index.month.isin([12,1,2])].resample("YE-FEB").mean()
nao_djf.index = nao_djf.index.year

# normalise to have mean 0 and variance 1
nao_djf = (nao_djf - nao_djf.loc[slice("1990","2020")].mean()) / nao_djf.loc[slice("1990","2020")].std()

In [44]:
# csv for climate explorer
csv_fnm = "med-storms_nao-djf_era5-stn"
csv_fullpath = "ts-obs/"+csv_fnm+".dat"
if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)

# create header lines for upload to Climate Explorer 
str1 = "# contact :: normalised December-February mean of NAO index from ERA5 MSLP - Mediterranean Storms, c.barnes22@imperial.ac.uk"
str2 = "# nao [] DJF NAO: difference between area-weighted mean MSLP in [28W-20W, 36N-40N] and [25W-16W, 63N-70N], normalised to have mean 0 and variance 1 from 1990-2020, at "+csv_fnm+".dat"
head = "# year nao"

# make .dat file
! echo "$str1 " > $csv_fullpath
! echo "$str2" >> $csv_fullpath
! echo "$head" >> $csv_fullpath
nao_djf.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)

# CORDEX

## Spatial pattern & daily time series

In [None]:
fpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/mediterranean-storms/cordex"
fl = sorted(glob.glob(fpath+"/pr/*"))

for fnm in fl:
    print(fnm)
    da = xr.open_dataset(fnm).pr

    if "rlat" in da.dims:
        xdim,ydim = ["rlon", "rlat"]
    elif "x" in da.dims:
        xdim,ydim = ["x", "y"]
    else:
        print("Unknown xydims")
        continue

    # get spatial pattern
    pr_spatial = da.sel(time = slice("1990","2020")).groupby("time.month").mean()
    pr_spatial.to_netcdf(fpath+"/pr-mmean"+fnm.split("/")[-1][2:-20]+"1990-2020.nc")

    # get daily time series
    for rnm in ["n", "s"]:
        new_fnm = fpath+"/pr-"+rnm+fnm.split("/")[-1][2:]
        if os.path.exists(new_fnm): continue

        rm  = regionmask.mask_geopandas(eval("sf_"+rnm), da)
        ts = da.where(rm == 0, drop = True).mean([xdim, ydim])
        ts.to_netcdf(new_fnm)

## Time series for climate explorer

### rx1day

In [None]:
for rnm in ["n", "s"]:

    rnm_long = {"n" : "northern", "s" : "southern"}[rnm]

    fl = glob.glob("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/mediterranean-storms/cordex/pr-"+rnm+"_*.nc")
    for fnm in fl:

        ds_nm = "_".join(fnm.split("_")[4:-1])
        print(ds_nm)
        
        ts = xr.open_dataset(fnm).pr
        rx1day = ts.sel(time = ts.time.dt.month.isin([10,11,12,1,2,3])).resample(time = "YE-JUL").max()
        df = rx1day.assign_coords(time = rx1day.time.dt.year).rename(time = "year").to_dataframe()

        # csv for climate explorer
        csv_fnm = "med-storms_rx1day-ondjfm_"+rnm+"_"+ds_nm
        csv_fullpath = "ts-cordex/"+csv_fnm+".dat"
        if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)
        
        # create header lines for upload to Climate Explorer 
        str1 = "# contact :: CORDEX "+ds_nm+" Rx1day-ONDJFM precipitation ("+rnm_long+" region) - Mediterranean storms, c.barnes22@imperial.ac.uk"
        str2 = "# rx1day [mm/day] Annual maximum of October-March daily precipitation (labelled at end of season) averaged over "+rnm_long+" region at "+csv_fnm+".dat"
        head = "# year rx1day"

        # make .dat file
        ! echo "$str1 " > $csv_fullpath
        ! echo "$str2" >> $csv_fullpath
        ! echo "$head" >> $csv_fullpath
        df.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)

clear_output(wait = False)
print("Done.")

### rx90day

In [5]:
for rnm in ["n", "s"]:

    rnm_long = {"n" : "northern", "s" : "southern"}[rnm]

    fl = glob.glob("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/mediterranean-storms/cordex/pr-"+rnm+"_*.nc")
    for fnm in fl:
        
        ds_nm = "_".join(fnm.split("_")[4:-1])
        print(ds_nm)
        
        ts = xr.open_dataset(fnm).pr.rolling(time = 90).mean().resample(time = "YE-JUL").mean()
        df = ts.assign_coords(time = ts.time.dt.year).rename(time = "year").to_dataframe()

        # csv for climate explorer
        csv_fnm = "med-storms_rx90day_"+rnm+"_"+ds_nm
        csv_fullpath = "ts-cordex/"+csv_fnm+".dat"
        if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)

        # create header lines for upload to Climate Explorer 
        str1 = "# contact :: CORDEX "+ds_nm+" Rx90day precipitation ("+rnm_long+" region) - Mediterranean storms, c.barnes22@imperial.ac.uk"
        str1 = "# contact :: "+ds_nm.upper()+" Rx90day ("+rnm_long+" region) - Mediterranean storms, c.barnes22@imperial.ac.uk"
        str2 = "# rx90day [mm/day] Annual maximum of October-March daily precipitation (labelled at end of season) averaged over "+rnm_long+" region at "+csv_fnm+".dat"
        head = "# year rx90day"

        # make .dat file
        ! echo "$str1 " > $csv_fullpath
        ! echo "$str2" >> $csv_fullpath
        ! echo "$head" >> $csv_fullpath
        df.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)

EUR-11_HadGEM2-ES_r1i1p1_REMO2015
EUR-11_HadGEM2-ES_r1i1p1_ALADIN63
EUR-11_CNRM-CM5_r1i1p1_RACMO22E
EUR-11_EC-EARTH_r12i1p1_HIRHAM5
EUR-11_HadGEM2-ES_r1i1p1_CCLM4-8-17
EUR-11_HadGEM2-ES_r1i1p1_HIRHAM5
EUR-11_HadGEM2-ES_r1i1p1_RCA4
EUR-11_CNRM-CM5_r1i1p1_ALARO-0
EUR-11_IPSL-CM5A-MR_r1i1p1_REMO2015
EUR-11_EC-EARTH_r3i1p1_RCA4
EUR-11_CNRM-CM5_r1i1p1_RCA4
EUR-11_EC-EARTH_r12i1p1_COSMO-crCLIM-v1-1
! Filename too long:  med-storms_rx90day_n_EUR-11_EC-EARTH_r12i1p1_COSMO-crCLIM-v1-1
EUR-11_NorESM1-M_r1i1p1_HadREM3-GA7-05
EUR-11_HadGEM2-ES_r1i1p1_HadREM3-GA7-05
EUR-11_IPSL-CM5A-MR_r1i1p1_RACMO22E
EUR-11_EC-EARTH_r12i1p1_RegCM4-6
EUR-11_NorESM1-M_r1i1p1_COSMO-crCLIM-v1-1
! Filename too long:  med-storms_rx90day_n_EUR-11_NorESM1-M_r1i1p1_COSMO-crCLIM-v1-1
EUR-11_MPI-ESM-LR_r1i1p1_HadREM3-GA7-05
EUR-11_MPI-ESM-LR_r3i1p1_COSMO-crCLIM-v1-1
! Filename too long:  med-storms_rx90day_n_EUR-11_MPI-ESM-LR_r3i1p1_COSMO-crCLIM-v1-1
EUR-11_CNRM-CM5_r1i1p1_WRF381P
EUR-11_MPI-ESM-LR_r2i1p1_RCA4
EUR-11_CNRM-CM

### NAO

In [84]:
gcm_list = list(set(["_".join(fnm.split("_")[-3:-1]) for fnm in glob.glob("ts-cordex/med-storms_rx1day-ondjfm_s_*.dat")]))
for gcm in gcm_list:
    
    fl = glob.glob("/rds/general/user/cb2714/home/00_WWA_project_folder/live/data/cmip5_covariates/cmip5-nao/nao_Amon_"+gcm+"*.nc")
    if len(fl) == 0: continue

    nao = xr.open_dataset(fl[0]).nao.convert_calendar("standard", align_on = "year").to_dataframe()
    nao_djf = nao.loc[[m in [12,1,2] for m in nao.index.month]].resample("YE-FEB").mean()
    nao_djf.index = nao_djf.index.year

    # normalise to have mean 0 and variance 1
    nao_djf = (nao_djf - nao_djf.loc[slice("1990","2020")].mean()) / nao_djf.loc[slice("1990","2020")].std()

    # remove first & last years, which only have partial seasons
    nao_djf = nao_djf.iloc[slice(1,-1)]

    # csv for climate explorer
    csv_fnm = "med-storms_nao-djf_cmip5_"+gcm
    csv_fullpath = "ts-cordex/"+csv_fnm+".dat"
    if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)
    
    # create header lines for upload to Climate Explorer 
    str1 = "# contact :: normalised December-February mean of NAO index from CMIP5 "+gcm+" - Mediterranean Storms, c.barnes22@imperial.ac.uk"
    str2 = "# nao [] DJF NAO: difference between area-weighted mean MSLP in [28W-20W, 36N-40N] and [25W-16W, 63N-70N], normalised to have mean 0 and variance 1 from 1990-2020, at "+csv_fnm+".dat"
    head = "# year nao"

    # make .dat file
    ! echo "$str1 " > $csv_fullpath
    ! echo "$str2" >> $csv_fullpath
    ! echo "$head" >> $csv_fullpath
    nao_djf.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)


# HighResMIP

## Time series for climate explorer

### rx1day

In [60]:
for rnm in ["n", "s"]:

    rnm_long = {"n" : "northern", "s" : "southern"}[rnm]

    fl = glob.glob("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/mediterranean-storms/highresmip/ts-"+rnm_long[:5]+"/*.nc")
    for fnm in fl:
        ds_nm = fnm.split("_")[-4]
        print(ds_nm)

        ts = xr.open_dataset(fnm).pr * 86400
        rx1day = ts.sel(time = ts.time.dt.month.isin([10,11,12,1,2,3])).resample(time = "YE-JUL").max()
        df = rx1day.assign_coords(time = rx1day.time.dt.year).rename(time = "year").to_dataframe()

        # csv for climate explorer
        csv_fnm = "med-storms_rx1day-ondjfm_"+rnm+"_"+ds_nm
        csv_fullpath = "ts-highresmip/"+csv_fnm+".dat"
        if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)
        
        # create header lines for upload to Climate Explorer 
        str1 = "# contact :: HighResMIP "+ds_nm+" (SST-forced) Rx1day-ONDJFM precipitation ("+rnm_long+" region) - Mediterranean storms, c.barnes22@imperial.ac.uk"
        str2 = "# rx1day [mm/day] Annual maximum of October-March daily precipitation (labelled at end of season) averaged over "+rnm_long+" region at "+csv_fnm+".dat"
        head = "# year rx1day"

        # make .dat file
        ! echo "$str1 " > $csv_fullpath
        ! echo "$str2" >> $csv_fullpath
        ! echo "$head" >> $csv_fullpath
        df.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)
    clear_output(wait = False)
print("Done.")

Done.


### rx90day

In [61]:
for rnm in ["n", "s"]:

    rnm_long = {"n" : "northern", "s" : "southern"}[rnm]

    fl = glob.glob("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/mediterranean-storms/highresmip/ts-"+rnm_long[:5]+"/*.nc")
    for fnm in fl:
        
        ds_nm = fnm.split("_")[-4]
        print(ds_nm)
        
        ts = xr.open_dataset(fnm).pr.rolling(time = 90).mean().resample(time = "YE-JUL").mean() * 86400
        df = ts.assign_coords(time = ts.time.dt.year).rename(time = "year").to_dataframe()

        # csv for climate explorer
        csv_fnm = "med-storms_rx90day_"+rnm+"_"+ds_nm
        csv_fullpath = "ts-highresmip/"+csv_fnm+".dat"
        if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)

        # create header lines for upload to Climate Explorer 
        str1 = "# contact :: HighResMIP "+ds_nm+" (SST-forced) Rx90day precipitation ("+rnm_long+" region) - Mediterranean storms, c.barnes22@imperial.ac.uk"
        str1 = "# contact :: "+ds_nm.upper()+" Rx90day ("+rnm_long+" region) - Mediterranean storms, c.barnes22@imperial.ac.uk"
        str2 = "# rx90day [mm/day] Annual maximum of October-March daily precipitation (labelled at end of season) averaged over "+rnm_long+" region at "+csv_fnm+".dat"
        head = "# year rx90day"

        # make .dat file
        ! echo "$str1 " > $csv_fullpath
        ! echo "$str2" >> $csv_fullpath
        ! echo "$head" >> $csv_fullpath
        df.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)
    clear_output(wait = False)
print("Done.")

Done.


### NAO

In [69]:
gcm_list = list(set([fnm.split("_")[-1][:-4] for fnm in glob.glob("ts-highresmip/med-storms_rx1day-ondjfm_s_*.dat")]))

In [71]:
for gcm in gcm_list:
    
    fl = glob.glob("/rds/general/user/cb2714/home/00_WWA_project_folder/live/data/highresmip/NAO/*"+gcm+"_*")
    if len(fl) == 0: continue

    if len(fl) > 1:
        fl = [fp for fp in fl if "r1i1p1f1" in fp]
        if len(fl) != 1: 
            print(gcm+" has multiple variants but no r1i1p1f1")
            continue

    nao = xr.open_dataset(fl[0]).nao.convert_calendar("standard", align_on = "year").to_dataframe()
    nao_djf = nao.loc[[m in [12,1,2] for m in nao.index.month]].resample("YE-FEB").mean()
    nao_djf.index = nao_djf.index.year

    # normalise to have mean 0 and variance 1
    nao_djf = (nao_djf - nao_djf.loc[slice("1990","2020")].mean()) / nao_djf.loc[slice("1990","2020")].std()

    # remove first & last years, which only have partial seasons
    nao_djf = nao_djf.iloc[slice(1,-1)]

    # csv for climate explorer
    csv_fnm = "med-storms_nao-djf_"+gcm
    csv_fullpath = "ts-highresmip/"+csv_fnm+".dat"
    if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)
    
    # create header lines for upload to Climate Explorer 
    str1 = "# contact :: normalised December-February mean of NAO index from HighResMIP SST-forced "+gcm+" "+fl[0].split("_")[-3]+" - Mediterranean Storms, c.barnes22@imperial.ac.uk"
    str2 = "# nao [] DJF NAO: difference between area-weighted mean MSLP in [28W-20W, 36N-40N] and [25W-16W, 63N-70N], normalised to have mean 0 and variance 1 from 1990-2020, at "+csv_fnm+".dat"
    head = "# year nao"

    # make .dat file
    ! echo "$str1 " > $csv_fullpath
    ! echo "$str2" >> $csv_fullpath
    ! echo "$head" >> $csv_fullpath
    nao_djf.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)