In [1]:
import sys; sys.path.append('..'); from wwa import *

xn,xx,yn,yx = [27,41,-13,2]
rxn,rxx,ryn,ryx = [34.5,38,-2,1]
mapproj = cartopy.crs.PlateCarree()

sf = gpd.read_file("sf")

In [37]:
sf_lv = gpd.read_file("sf_lvictoria")
sf_lt = gpd.read_file("sf_ltanganyika")
sf_tc = gpd.read_file("sf_tanzaniaCoast")
sf_ch = gpd.read_file("sf_centralHighlands")
sf_box = gpd.GeoDataFrame(pd.DataFrame(data = {"region" : ["box"]}, index = [0]), geometry = [Polygon(zip([rxn,rxn,rxx,rxx,rxn], [ryn,ryx,ryx,ryn,ryn]))], crs = "EPSG:4326")

sf_vt = gpd.read_file("sf_lvictoriaTanganyika")
sf_org = gpd.read_file("sf")

sf_names = {"lv" : "Lake Victoria basin", "lt" : "Lake Tanganyika basin", "tc" : "southeast Tanzania",
            "ch" : "Central Highlands to coast", "box" : "Central Highlands", "vt" : "Lake Victoria & Tanganyika basins", "org" : "Tanzania/Burundi/SW Kenya"}

# HighResMIP

## Extract data over the region

In [56]:
ml = sorted(glob.glob("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/synda_Mariam/data/highresmip/*/pr/"))
outpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/eastAfrica_floods/highresmip/"

In [57]:
for mpath in ml:
    
    print(mpath.split("/")[-3])
    fl_hist = sorted(glob.glob(mpath+"*present*[0-9].nc"))
    fl_fut = sorted(glob.glob(mpath+"*future*[0-9].nc"))

    # filter file list to remove duplicated years
    last_hist = fl_hist[-1][-11:-3]
    fl_fut = [fnm for fnm in fl_fut if fnm[-20:-12] > last_hist]

    new_fnm = outpath + "pr_day_"+"_".join(fl_hist[0].split("_")[6:7]+fl_hist[0].split("_")[-3:-1])+".nc"
    
    if os.path.exists(new_fnm): continue
    da = xr.open_mfdataset(fl_hist + fl_fut).pr.sel(lon = slice(xn-2,xx+2), lat = slice(yn-2,yx+2))

    da.to_netcdf(new_fnm)
    
clear_output(wait = False)
print("Done.")

In [61]:
# time series, spatial & seasonal patterns
for fnm in glob.glob(outpath+"*.nc"):
    
    mdl = "_".join(fnm.split("_")[-3:-1])
    da = convert_units_to(xr.open_dataset(fnm).pr, "mm/day")
    
    if len(da.sel(time = slice("1990","2020")).time) == 0: continue
    
    # spatial pattern
    sp = da.resample(time = "QS-DEC").sum().groupby("time.season")["MAM"].sel(time = slice("1990", "2020")).mean("time")
    sp.to_netcdf("data/highresMIP/"+re.sub("day", "spatial", fnm.split("/")[-1]))
    
    # daily time series
    for sf_nm in list(sf_names.keys()):
        sf = eval("sf_"+sf_nm)
        rm = regionmask.mask_geopandas(sf, da.lon, da.lat)
        ts = da.where(rm == 0).mean(["lat", "lon"])
    
        # seasonal cycle
        sc = ts.sel(time = slice("1990","2020")).groupby("time.dayofyear").mean()
        sc.to_netcdf("data/highresMIP/"+re.sub("_day", "-"+sf_nm+"_seasonal-cycle", fnm.split("/")[-1]))
    
        # create time series for Climate Explorer
        for ndays in [30]:

            rxnday = ts.rolling(time = ndays).sum().resample(time = "QS-DEC").max().groupby("time.season")["MAM"]
            rxnday = rxnday.assign_coords(time = rxnday.time.dt.year).rename(time = "year").to_dataframe()

            csv_fnm = "EA-floods_rx"+str(ndays)+"day-"+sf_nm+"_highresmip_"+mdl
            csv_fullpath = "ts_highresmip/"+csv_fnm+".dat"
            if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)

            # create extra header lines for upload to Climate Explorer 
            str1 = "# contact :: HighResMIP "+mdl+" MAM maxima of rx"+str(ndays)+"day - East Africa floods 2024, c.barnes22@imperial.ac.uk"
            str2 = "# rx"+str(ndays)+"day [mm] March-May maximum of "+str(ndays)+"-day accumulated precipitation over "+sf_names[sf_nm]+" at "+csv_fnm+".dat"
            head = "# year rx"+str(ndays)+"day"

            # make .dat file
            ! echo "$str1 " > $csv_fullpath
            ! echo "$str2" >> $csv_fullpath
            ! echo "$head" >> $csv_fullpath
            rxnday.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)

            clear_output(wait = False)

# Gridded observational products

In [48]:
# ds = "chirps"; da = wrap_lon(xr.open_dataset("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/eastAfrica_floods/precip_chirps_1981-2024_EA.nc")).precip.rename("pr", longitude = "lon", latitude = "lat")
# ds = "tamsat"; da = wrap_lon(xr.open_dataset("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/eastAfrica_floods/precip_tamsat_1983-2024_EA.nc")).rfe.rename("pr")
ds = "mswep"; da = wrap_lon(xr.open_mfdataset("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/eastAfrica_floods/MSWEP/mswep_EA_*.nc")).precipitation.rename("pr")
# ds = "cpc"; da = wrap_lon(xr.open_mfdataset("/rds/general/user/cb2714/home/00_WWA_project_folder/live/data/cpc/precip*.nc")).precip.rename("pr")

In [49]:
# Daily time series from shapefiles
for sf_nm in list(sf_names.keys()):
    sf = eval("sf_"+sf_nm)
    rm = regionmask.mask_geopandas(sf, da.lon, da.lat)
    ts = da.where(rm == 0).mean(["lat", "lon"])
    ts.to_netcdf("data/daily-ts_"+sf_nm+"_"+ds+".nc")

In [12]:
# gridded n-day seasonal maxima
for ndays in [30, 60]:
    da = da.sel(lon = slice(xn,xx), lat = slice(yn,yx))
    rxnday = da.rolling(time = ndays, center = False).sum().resample(time = "QS-DEC").max().groupby("time.season")["MAM"]
    rxnday.to_netcdf("data/rx"+str(ndays)+"day-MAM_gridded_"+ds+".nc")

## Time series for climate explorer

In [50]:
datasets = ["cpc", "mswep", "chirps", "tamsat"]
for ndays in [30]:
    for region in list(sf_names.keys()):
        for ds in datasets:

            ts = xr.open_dataset("data/daily-ts_"+region+"_"+ds+".nc").pr
            ts = ts.rolling(time = ndays, center = False).sum().resample(time = "QS-DEC").max().groupby("time.season")["MAM"]
            ts = ts.assign_coords(time = ts.time.dt.year).rename(time = "year").to_dataframe()

            csv_fnm = "EA-floods_rx"+str(ndays)+"day_obs-"+region+"_"+ds
            csv_fullpath = "ts_obs/"+csv_fnm+".dat"

            if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)
                
            rnm = sf_names[region]

            # create extra header lines for upload to Climate Explorer 
            str1 = "# contact :: "+ds.upper()+" MAM maxima of rx"+str(ndays)+"day averaged over "+rnm+" - East Africa floods 2024, c.barnes22@imperial.ac.uk"
            str2 = "# rx"+str(ndays)+"day [mm] March-May maxima of "+str(ndays)+"-day accumulated precipitation at "+csv_fnm+".dat"
            head = "# year rx"+str(ndays)+"day"

            # make .dat file
            ! echo "$str1 " > $csv_fullpath
            ! echo "$str2" >> $csv_fullpath
            ! echo "$head" >> $csv_fullpath
            ts.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)

            clear_output(wait = False)

# MAM climatology 1990-2020

In [10]:
# ds = "chirps"; da = wrap_lon(xr.open_dataset("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/eastAfrica_floods/precip_chirps_1981-2024_EA.nc")).precip.rename("pr", longitude = "lon", latitude = "lat")
# ds = "tamsat"; da = wrap_lon(xr.open_dataset("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/eastAfrica_floods/precip_tamsat_1983-2024_EA.nc")).rfe.rename("pr")
# ds = "mswep"; da = wrap_lon(xr.open_mfdataset("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/eastAfrica_floods/MSWEP/mswep_EA_*.nc")).precipitation.rename("pr")
ds = "cpc"; da = wrap_lon(xr.open_mfdataset("/rds/general/user/cb2714/home/00_WWA_project_folder/live/data/cpc/precip*.nc")).precip.rename("pr")

In [None]:
clim = da.resample(time = "QS-DEC").sum().groupby("time.season")["MAM"].sel(time = slice("1990", "2020")).mean("time")
clim = clim.where(clim > 0)
clim.to_netcdf("data/mam-climatology_"+ds+".nc")

## Flatten gridded data for trend fitting

In [53]:
# gridded n-day seasonal maxima over study region only
ndays = 30
for ds in ["chirps", "cpc", "mswep", "tamsat"][1:]:
    
    da = xr.open_dataset("data/rx"+str(ndays)+"day-MAM_gridded_"+ds+".nc").pr
    da = da.assign_coords(time = da.time.dt.year).rename(time = "year")
    
    if ds in ["mswep"]:
        # mask out the sea
        rm = regionmask.defined_regions.natural_earth_v5_0_0.land_10.mask(da.lon, da.lat)
        da = da.where(rm == 0)

    # save the map for easier reconstruction later
    da.mean("year").to_netcdf("data/map-tmplt_"+ds+".nc")
    
    # flatten & save as .csv for transfer to R
    da_df = da.stack(xy = ["lat", "lon"]).dropna("xy", "all").to_pandas()
    
    if da_df.shape[1] > 2500:
        for i in range(int(np.ceil(da_df.shape[1] / 2500))):
            da_df.iloc[:,slice(i*2500,(i+1)*2500)].to_csv("data/flattened/rx"+str(ndays)+"days-flattened_"+ds+"_"+str(i+1).rjust(2,"0")+".csv")
    else:
        da_df.to_csv("data/flattened/rx"+str(ndays)+"days-flattened_"+ds+".csv")
    

## Reshape dataframe into maps

In [17]:
warnings.filterwarnings("ignore", category = FutureWarning) # otherwise there will be a LOT of warnings

def vec2map(x, mask):
    
    # reconstruct vector into map
    # create an empty map with NA in same cells as masks
    arr = mask.where(np.isnan(mask), 0)
    
    # get coordinates of non-empty cells
    px = np.argwhere(~np.isnan(mask.values))
    
    # Transfer vector values into non-empty cells in array
    if len(px) == len(x):
        for i in list(range(len(px))): arr[px[i,0], px[i,1]] = x[i]
        return arr
    else:
        print(str(len(x))+" values, but "+str(len(px))+" cells")
        return

In [18]:
ndays = 30
datasets = ["tamsat", "chirps", "cpc", "mswep"]
for ds in datasets:
    
    new_fnm = "res/res-gridded_rx"+str(ndays)+"day_"+ds+".nc"
    if os.path.exists(new_fnm): continue
    
    # load map to be used to reshape
    tmplt = xr.open_dataset("data/map-tmplt_"+ds+".nc").pr
    
    df = merge_byindex([pd.read_csv(fnm, index_col = 0) for fnm in sorted(glob.glob("data/gridded-res/rx"+str(ndays)+"day*_"+ds+"*.csv"))])
    mdl_res = xr.merge([vec2map(df.loc[r], tmplt).rename(r) for r in df.index])
    mdl_res.to_netcdf(new_fnm)

# Covariates

## MAM IOD

In [45]:
iod = decode_times(xr.open_dataset("data/idmi_ersst.nc", decode_times = False))["diff"].rename("IOD")

iod_mam = iod.resample(time = "QS-DEC").mean().groupby("time.season")["MAM"]
iod_mam = iod_mam.assign_coords(time = iod_mam.time.dt.year).rename(time = "year").to_dataframe()
ts = iod_mam

In [29]:
csv_fnm = "EA-floods_iod-mam_obs_ersst"
csv_fullpath = "ts/"+csv_fnm+".dat"

if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)

# create extra header lines for upload to Climate Explorer 
str1 = "# contact :: MAM IOD (ERSST) - East Africa floods 2024, c.barnes22@imperial.ac.uk"
str2 = "# iod [degC] March-May mean of IOD at "+csv_fnm+".dat"
head = "# year iod"

# make .dat file
! echo "$str1 " > $csv_fullpath
! echo "$str2" >> $csv_fullpath
! echo "$head" >> $csv_fullpath
iod_mam.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)

clear_output(wait = False)

## DJF detrended Nino3.4

In [48]:
nino = xr.open_dataset("../10_misc/detrended_nino34/ersst_nino34_detrended.nc").nino34
nino_djf = nino.resample(time = "QS-DEC").mean().groupby("time.season")["DJF"]
nino_djf = nino_djf.assign_coords(time = nino_djf.time.dt.year + 1).rename(time = "year").to_dataframe()
ts = nino_djf

csv_fnm = "EA-floods_nino-djf_obs_ersst"
csv_fullpath = "ts/"+csv_fnm+".dat"

if len(csv_fnm) > 61: print("! Filename too long: ", csv_fnm)

# create extra header lines for upload to Climate Explorer 
str1 = "# contact :: DJF detrended Nino3.4 (ERSST) - East Africa floods 2024, c.barnes22@imperial.ac.uk"
str2 = "# nino [degC] December-February mean of Nino3.4 detrended by subtracting mean of tropical SSTs at "+csv_fnm+".dat"
head = "# year nino"

# make .dat file
! echo "$str1 " > $csv_fullpath
! echo "$str2" >> $csv_fullpath
! echo "$head" >> $csv_fullpath
ts.to_csv(csv_fullpath, sep = " ", mode = "a", header = False)

clear_output(wait = False)