In [2]:
import sys; sys.path.append('..'); from wwa import *
cproj = cartopy.crs.PlateCarree()

xn, xx, yn, yx = [-85,-75,6,10]
Xn, Xx, Yn, Yx = [-85,-75,5,15]

units = {"pr" : "mm/day", "tasmin" : "degC", "tasmax" : "degC"}

---
---
# Observational datasets

---
## Compile CHIRPS

In [6]:
ds = xr.open_mfdataset("../../00_WWA_project_folder/live/data/chirps_05/*.nc").sel(longitude = slice(xn,xx), latitude = slice(yn, yx))
ds.to_netcdf("data/chirps05_panama.nc")

## Compile MSWEP

In [4]:
# cut out yearly slices
for y in range(2024, 1978, -1)[:2]:
    
    new_fnm = "mswep/mswep_panama_"+str(y)+".nc"
    if os.path.exists(new_fnm): continue
    
    print(y)
    da = xr.open_mfdataset("../../00_WWA_project_folder/live/data/mswep_*/"+str(y)+"*.nc").sel(lon = slice(xn,xx), lat = slice(yx, yn))
    wrap_lon(da).rename(precipitation = "pr").to_netcdf(new_fnm)

2023


In [5]:
# compile into single file
xr.open_mfdataset("mswep/mswep_panama_*.nc").to_netcdf("data/mswep_panama.nc")

## MSWX tmax & tmin

In [7]:
varnm = "tmin"

# cut out yearly slices
for y in range(2024, 1978, -1):
    
    new_fnm = "mswep/mswx_"+varnm+"_panama_"+str(y)+".nc"
    if os.path.exists(new_fnm): continue
    
    print(y)
    da = xr.open_mfdataset("../00_WWA_project_folder/live/data/mswx/"+varnm+"_*/"+str(y)+"*.nc").sel(lon = slice(xn,xx), lat = slice(yx, yn))
    wrap_lon(da).rename(air_temperature = varnm).to_netcdf(new_fnm)

2023


In [8]:
# compile into single file
xr.open_mfdataset("mswep/mswx_"+varnm+"_panama_*.nc").to_netcdf("data/mswx_"+varnm+"_panama.nc")

---
## ERA5land

### Download

In [None]:
import os
import cdsapi

c = cdsapi.Client()

for y in [str(y) for y in range(2023,1950,-1)]:
#     for m in [str(m).rjust(2, "0") for m in range(13)[1:]]:
        
        new_fnm = 'era5-land/era5land_panama_tp_'+y+'.nc'
        if os.path.exists(new_fnm): continue
        print(new_fnm)

        c.retrieve(
            'reanalysis-era5-land',
            {
                'variable': [
                    'total_precipitation',
                ],
                'year': y,
                'month': [
                    '01', '02', '03',
                    '04', '05', '06',
                    '07', '08', '09',
                    '10', '11', '12'],
                'day': [
                    '01', '02', '03',
                    '04', '05', '06',
                    '07', '08', '09',
                    '10', '11', '12',
                    '13', '14', '15',
                    '16', '17', '18',
                    '19', '20', '21',
                    '22', '23', '24',
                    '25', '26', '27',
                    '28', '29', '30', '31',
                ],
                'time': [
                    '00:00', '01:00', '02:00',
                    '03:00', '04:00', '05:00',
                    '06:00', '07:00', '08:00',
                    '09:00', '10:00', '11:00',
                    '12:00', '13:00', '14:00',
                    '15:00', '16:00', '17:00',
                    '18:00', '19:00', '20:00',
                    '21:00', '22:00', '23:00',
                ],
                'area': [
                    9.7, -80.25, 8.7,
                    -79.25,
                ],
                'format': 'netcdf',
            },
            new_fnm)
        

### Compile

In [8]:
ds = xr.open_mfdataset([fnm for fnm in glob.glob("era5-land/*.nc")]).max("expver").tp
ds = ds.resample(time = "D").sum()
ds.to_netcdf("data/era5land_panama.nc")

---
# Station data

## Monthly precip from Panama Canal Authority sites

In [146]:
# create dictionary to match between 2023 data and monthly vertical summaries
stations = {'AGUACLARA': 'ACL', 'AGUASALUD': 'ASA', 'ALHAJUELA': 'ALA', 'ARCASONIA': 'ARC', 'AMADOR': 'AMA', 'BARBACOA': 'BBQ', 'BCI': 'BCI', 'BALBOAHTS': 'BHT', 'CHICOCABERCERA': 'CAB',
             'CANONES': 'CAN', 'CASCADAS': 'CAS', 'CERROCAMA': 'CCA', 'COCOLI326': 'CCL', 'CANDELARIA': 'CDL', 'CHICO': 'CHI', 'CHAMON': 'CHM', 'CHORRO': 'CHR', 'CANO': 'CNO', 'CHAGRECITO': 'CTO',
             'COROZAL': 'CZL', 'DOSBOCAS': 'DBK', 'DIABLO': 'DHT', 'EMPIREHILL': 'EMH', 'ESCANDALOSA': 'ESC', 'ESPERANZA': 'EZA', 'BALBOAFAA': 'FAA', 'CIENTO': 'CNT', 'FRIJOLITO': 'FTO',
             'GASPARILLAL': 'GAD', 'GAMBOA': 'GAM', 'GATUN': 'GAT', 'GOLD HILL': 'GOL', 'GATUNWEST': 'GTW', 'GUACHA': 'GUA', 'HUMEDAD': 'HUM', 'ISLABRUJA': 'IBC', 'JAGUA': 'JAG', 'LIMONBAY': 'LMB',
             'MIRAFLORES': 'MIR', 'MONTE LIRIO': 'MLR', 'PUNTABOHIO': 'PBO', 'PELUCA': 'PEL', 'PUNTAFRIJOLES': 'PFR', 'PEDROMIGUEL': 'PMG', 'RAICES': 'RAI', 'RIOPIEDRAS': 'RPA', 'SALAMANCA': 'SAL',
             'SANTACLARA': 'SCL', 'SANMIGUEL': 'SMG', 'SANTAROSA': 'SRO', 'TRANQUILLA': 'TRA', 'VALLEGATUN': 'VCG', 'VISTAMARES': 'VTM', 'ZANGUENGA': 'ZAN'}

In [147]:
## 2023 DATA ##

# load station names
df_head = pd.read_csv("data/Precip-2023.csv", nrows=1)
df_head.iloc[0,0] = "timestamp"
df_head.loc[0].values

# load station data
pr_2023 = pd.read_csv("data/Precip-2023.csv", skiprows=4, parse_dates = [0], index_col = 0, names = df_head.loc[0].values).resample("MS").sum()

In [148]:
## PRIOR TO 2023 ##

# load data, create headers from first three rows
acp_pr = pd.read_excel("data/Monthly_Rain_ACP_Vertical.xlsx", sheet_name = "Rain.All", header = None)
acp_pr.columns = [" ".join([re.sub("nan", "", str(n)) for n in list(r)]).strip() for k, r in acp_pr.iloc[:3].items()]

# clean empty rows
acp_pr = acp_pr.iloc[3:]
acp_pr = acp_pr.loc[~acp_pr.Year.isna()]

# index by date & select columns of interest
acp_pr.index = [datetime.strptime(str(int(r["Year"])) + "-" +str(int(r["Month"])), "%Y-%m") for k,r in acp_pr.iterrows()]
acp_pr = acp_pr[[c for c in acp_pr.columns if not c in ["Year", "Month", "Yr.Mon", "Average", "1-yr", "3-yr", "5-year Running Average"]]]

# modify column names to match station data
acp_pr = acp_pr.rename(columns = {"CAÑO" : "CANO",
                                  "HODGESHILL GOLD HILL" : "GOLD HILL",
                                  "CRISTOBAL COCOSOLO LIMONBAY" : "LIMONBAY",
                                  "MONTELIRIO" : "MONTE LIRIO",
                                  "SAN PEDRO" : "SANPEDRO",
                                  "VALLECENTRALGATUN" : "VALLEGATUN",
                                  "CERROCOCOLI" : "COCOLI",
                                  "PEDROMIGEL" : "PEDROMIGUEL"})

In [149]:
# stations in monthly vertical not matched in 2023 file
[c for c in acp_pr.columns if not c in stations.keys()]

['AGUABUENA',
 'BALBOADOCKS',
 'BATATILLA',
 'BATEALES',
 'BOCATUCUE',
 'COCOLI',
 'CHISNA',
 'COCLENORTE',
 'FTS',
 'GALETA',
 'GATUNCENTRAL',
 'INDIOCHORROS',
 'LIMPIO',
 'LOSDARIELES',
 'LOSHULES',
 'MARIAS',
 'NUEVABORINQUEN',
 'PALMARAZO',
 'SANPEDRO',
 'SANVICENTE']

In [150]:
# stations in 2023 file not matched in monthly vertical data
station_names[[c for c in station_names.columns[1:] if not c in stations.values()]]

Unnamed: 0,BUA,CUL
0,Buenos Aires,Culebra


In [164]:
# rename stations as station codes & merge with 2023 data
acp_pr = acp_pr.rename(columns = stations)
acp_pr = pd.concat([acp_pr, pr_2023])

acp_pr = acp_pr[[c for c in acp_pr.columns if c in stations.values()]]
acp_pr.to_csv("data/pr_monthly_acp.csv")

## Compile station data

In [174]:
acp_stations = pd.read_excel("data/Monthly_Rain_ACP_Vertical.xlsx", sheet_name = "Locations", skiprows = 1)
acp_stations.index = [str(s).strip() for s in acp_stations["STRI Name"]]

# label stations as active/inactive & clean rows
acp_stations["STATUS"] = "Active"
acp_stations.iloc[list(acp_stations["ACP Name"].values).index("Deactivated Stations"):,-1] = "Deactivated"
acp_stations = acp_stations.loc[(~acp_stations["ACP Name"].isna()) & (acp_stations["ACP Name"] != "Deactivated Stations")]

# remove duplicate stations
acp_stations = acp_stations.loc[~((acp_stations.index == "PEDROMIGUEL") & (acp_stations.STATUS == "Deactivated"))]
acp_stations = acp_stations.loc[~((acp_stations.index == "RIOPIEDRAS") & (acp_stations.STATUS == "Deactivated"))]

# relabel lat & lon
acp_stations = acp_stations.rename(columns = {"LAT (N).1" : "lat", "LONG (W).1" : "lon"})

acp_stations.to_csv("data/acp_stations_all.csv")

In [177]:
# select the active stations & save details
acp_stations = acp_stations.loc[[s for s in acp_stations.index if s in stations.keys()]].rename(index = stations)
acp_stations = acp_stations.rename(index = stations)

acp_stations[["ACP Name", "STRI Name", "ELE (m)", "lat", "lon"]].to_csv("data/acp_stations.csv")

---
# Climate models - extract data

## HighResMIP

### Extract subset of data

In [2]:
# list & filter models
synda_path = "/rds/general/project/wwa/ephemeral/synda_clair/data/HighResMIP/"
out_path = "/rds/general/user/cb2714/home/01_wwa/24-01_Panama-drought/highresmip/tmp/"

mdl_list = {varnm : list() for varnm in ["pr", "tasmin", "tasmax"]}

In [6]:
for fp in glob.glob(synda_path+"*/r1i1*"):
    print("_".join(fp.split("/")[-2:]))
    
    for varnm in list(mdl_list.keys()):
        
        fl = sorted(glob.glob(fp+"/"+varnm+"_*.nc"))
        print("  "+varnm+" ("+str(len(fl))+") ", end = "")
        
        if len(fl) == 0: 
            print("No data")
            continue
            
        # filter to remove any runs that don't cover the required period
        if min([fnm[-20:-16] for fnm in fl]) > "1980" or max([fnm[-11:-7] for fnm in fl]) < "2023": 
            print(str(min([fnm[-20:-16] for fnm in fl]))+"-"+str(max([fnm[-11:-7] for fnm in fl]))+" only")
            continue
            
        print("KEEP")
        mdl_list[varnm].append(fp)
      
    print("")
clear_output(wait = False)
print("Done.")

Done.


In [7]:
# extract subset of data for easier handling
for varnm in mdl_list.keys():
    ml = mdl_list[varnm]
    
    for fp in ml:
        print(fp.split("/")[-2] + " ("+str(ml.index(fp)+1)+"/"+str(len(ml))+")")
        
        fl = sorted(glob.glob(fp+"/"+varnm+"_*.nc"))
        print("  "+varnm+" ("+str(len(fl))+") ", end = "")
        
        for fnm in fl:
        
            new_fnm = out_path + re.sub("-present", "", re.sub("-future", "", fnm.split("/")[-1]))
            if os.path.exists(new_fnm): continue
                
            # if not already done, load the data & cut out the required region
            da = wrap_lon(convert_units_to(xr.open_dataset(fnm)[varnm].reset_coords(drop = True), units[varnm]))
            
            # fix dimension names if necessary
            if "longitude" in da.coords: da = da.rename(longitude = "lon", latitude = "lat")
            
            # save daily data over slightly larger region
            box = np.logical_and(np.logical_and(da.lon >= Xn, da.lon <= Xx), np.logical_and(da.lat >= Yn, da.lat <= Yx))
            box = box.where(box == 1).dropna("lon", "all").dropna("lat", "all")
            sp = da.sel({"lon" : box.lon, "lat" : box.lat})
            sp.to_netcdf(new_fnm)
            
            print(".", end = "")

        print("")
    clear_output(wait = False)
print("Done.")

Done.


### Compile into single dataset

---
## CMIP6

Downloaded via synda:
`synda search project=CMIP6 variant_label=r1i1p1f1 experiment_id='ssp585,historical' frequency=day variable='pr,tasmin,tasmax'  local_path_drs_template=CMIP6/%(source_id)s/%(variable)s`

---
## CORDEX (SAM)
_Downloaded via synda_

In [14]:
synda_path = "/rds/general/project/wwa/ephemeral/synda_clair/data/"
out_path = "/rds/general/user/cb2714/home/01_wwa/24-01_Panama-drought/cordex/tmp/"

In [58]:
# filter out any models that don't have both historical & rcp85
model_list = [m for m in sorted(glob.glob(synda_path+"*/*/*")) if not "HighResMIP" in m and not "CMIP6" in m]

mdl_list = {k : list() for k in ["pr", "tasmin", "tasmax"]}
for fp in model_list:
    for varnm in mdl_list.keys():
        fl_hist = glob.glob(fp+"/"+varnm+"/*hist*.nc")
        fl_rcp = glob.glob(fp+"/"+varnm+"/*rcp85*.nc")
        
        if len(fl_hist) == 0 or len(fl_rcp) == 0: continue
        mdl_list[varnm].append(fp)

In [77]:
abridge_gcm = {'CCCma-CanESM2' : "CanESM2",
               "CNRM-CERFACS-CNRM-CM5" : "CNRM-CM5",
               'CSIRO-QCCCE-CSIRO-Mk3-6-0' : 'CSIRO-Mk3-6-0',
               "ICHEC-EC-EARTH" : "EC-EARTH",
               'IPSL-IPSL-CM5A-LR' : 'IPSL-CM5A-LR',
               'IPSL-IPSL-CM5A-MR' : 'IPSL-CM5A-MR',
               'MIROC-MIROC5' : 'MIROC5',
               'MOHC-HadGEM2-ES' : 'HadGEM2-ES',
               'MPI-M-MPI-ESM-LR' : 'MPI-ESM-LR',
               'MPI-M-MPI-ESM-MR' : 'MPI-ESM-MR',
               'NCC-NorESM1-M' : 'NorESM1-M',
               'NOAA-GFDL-GFDL-ESM2G' : 'GFDL-ESM2G',
               'NOAA-GFDL-GFDL-ESM2M' : 'GFDL-ESM2M'}

In [104]:
# extract subset of data for easier handling
for varnm in mdl_list.keys():
    ml = mdl_list[varnm]
    
    for fp in ml:
        mdl = "_".join([abridge_gcm[fp.split("/")[-3]]] + fp.split("/")[-2:])
        print(mdl + " ("+str(ml.index(fp)+1)+"/"+str(len(ml))+")")
        
        # something wrong with time bounds in this one, can't open the files
        if mdl in ['HadGEM2-ES_r1i1p1_RegCM4-3']: continue
        
        fl = sorted(glob.glob(fp+"/"+varnm+"/"+varnm+"_*.nc"))
        print("  "+varnm+" ("+str(len(fl))+") ", end = "")
        
        for fnm in fl:
            print(".", end = "")
            new_fnm = out_path+"_".join([varnm, fnm.split("_")[2], mdl, fnm.split("_")[-1]])
            
            # skip if file has already been processed
            if os.path.exists(new_fnm): continue
            
            # load data, convert to correct units
            da = xr.open_dataset(fnm)[varnm]
            da = convert_units_to(da, units[varnm])
            if "height" in da.coords: da.reset_coords("height", drop = True)  # clean up unwanted extra coordinates
                
            # identify primary coordinates
            if "rlon" in da.dims:
                xdim, ydim = ["rlat", "rlon"]
            elif "x" in da.dims:
                xdim, ydim = ["x", "y"]
            else:
                print(da.dims)
                continue
            
            # cut out smaller region & save as temp file
            box = np.logical_and(np.logical_and(da.lon >= Xn, da.lon <= Xx), np.logical_and(da.lat >= Yn, da.lat <= Yx))
            box = box.where(box == 1).dropna(xdim, "all").dropna(ydim, "all")
            da = da.sel({xdim : box[xdim], ydim : box[ydim]})
            da.to_netcdf(new_fnm)
            
            print(".", end = "")
        print("")
    clear_output(wait = False)
print("Done.")

Done.


---
## CORDEX (CAM)
_Downloaded via synda_

In [2]:
synda_path = "/rds/general/project/wwa/ephemeral/synda_clair/data/"
out_path = "/rds/general/user/cb2714/home/01_wwa/24-01_Panama-drought/cordex/tmp/"

In [12]:
# filter out any models that don't have both historical & rcp85
model_list = [m for m in sorted(glob.glob(synda_path+"*/*/*/*")) if "CAM-" in m]
mdl_list = {k : list() for k in ["pr", "tasmin", "tasmax"]}
for fp in model_list:
    for varnm in mdl_list.keys():
        fl_hist = glob.glob(fp+"/"+varnm+"/*hist*.nc")
        fl_rcp = glob.glob(fp+"/"+varnm+"/*rcp85*.nc")
        
        if len(fl_hist) == 0 or len(fl_rcp) == 0: continue
        mdl_list[varnm].append(fp)

In [17]:
abridge_gcm = {'CCCma-CanESM2' : "CanESM2",
               "CNRM-CERFACS-CNRM-CM5" : "CNRM-CM5",
               'CSIRO-QCCCE-CSIRO-Mk3-6-0' : 'CSIRO-Mk3-6-0',
               "ICHEC-EC-EARTH" : "EC-EARTH",
               'IPSL-IPSL-CM5A-LR' : 'IPSL-CM5A-LR',
               'IPSL-IPSL-CM5A-MR' : 'IPSL-CM5A-MR',
               'MIROC-MIROC5' : 'MIROC5',
               'MOHC-HadGEM2-ES' : 'HadGEM2-ES',
               'MPI-M-MPI-ESM-LR' : 'MPI-ESM-LR',
               'MPI-M-MPI-ESM-MR' : 'MPI-ESM-MR',
               'NCC-NorESM1-M' : 'NorESM1-M',
               'NOAA-GFDL-GFDL-ESM2G' : 'GFDL-ESM2G',
               'NOAA-GFDL-GFDL-ESM2M' : 'GFDL-ESM2M'}

In [20]:
# extract subset of data for easier handling
for varnm in mdl_list.keys():
    ml = mdl_list[varnm]
    
    for fp in ml:
        mdl = "_".join([abridge_gcm[fp.split("/")[-3]]] + fp.split("/")[-2:])
        print(mdl + " ("+str(ml.index(fp)+1)+"/"+str(len(ml))+")")
        
        # something wrong with time bounds in this one, can't open the files
        if mdl in ['HadGEM2-ES_r1i1p1_RegCM4-3', 'MPI-ESM-MR_r1i1p1_RegCM4-3']: continue
        
        fl = sorted(glob.glob(fp+"/"+varnm+"/"+varnm+"_*.nc"))
        print("  "+varnm+" ("+str(len(fl))+") ", end = "")
        
        for fnm in fl:
            print(".", end = "")
            new_fnm = out_path+"_".join([varnm, fnm.split("_")[2], mdl, fnm.split("_")[-1]])
            
            # skip if file has already been processed
            if os.path.exists(new_fnm): continue
            
            # load data, convert to correct units
            da = xr.open_dataset(fnm)[varnm]
            da = convert_units_to(da, units[varnm])
            if "height" in da.coords: da.reset_coords("height", drop = True)  # clean up unwanted extra coordinates
                
            # identify primary coordinates
            if "rlon" in da.dims:
                xdim, ydim = ["rlat", "rlon"]
            elif "x" in da.dims:
                xdim, ydim = ["x", "y"]
            else:
                print(da.dims)
                continue
            
            # cut out smaller region & save as temp file
            box = np.logical_and(np.logical_and(da.lon >= Xn, da.lon <= Xx), np.logical_and(da.lat >= Yn, da.lat <= Yx))
            box = box.where(box == 1).dropna(xdim, "all").dropna(ydim, "all")
            da = da.sel({xdim : box[xdim], ydim : box[ydim]})
            da.to_netcdf(new_fnm)
            
            print(".", end = "")
        print("")
    clear_output(wait = False)
print("Done.")

Done.


### Compile

In [3]:
glob.glob("cordex/tmp/pr*.nc")

['cordex/tmp/pr_CAM-44_IPSL-CM5A-MR_r1i1p1_RCA4_19510101-19551231.nc',
 'cordex/tmp/pr_CAM-44_CanESM2_r1i1p1_RCA4_20310101-20351231.nc',
 'cordex/tmp/pr_CAM-44_NorESM1-M_r1i1p1_RCA4_20360101-20401231.nc',
 'cordex/tmp/pr_CAM-22_GFDL-ESM2M_r1i1p1_RegCM4-7_20370101-20371231.nc',
 'cordex/tmp/pr_SAM-44_HadGEM2-ES_r1i1p1_RCA4_19610101-19651230.nc',
 'cordex/tmp/pr_SAM-22_MPI-ESM-MR_r1i1p1_RegCM4-7_20160101-20161231.nc',
 'cordex/tmp/pr_CAM-22_CNRM-CM5_r1i1p1_CRCM5_19610101-19651231.nc',
 'cordex/tmp/pr_SAM-22_MPI-ESM-MR_r1i1p1_RegCM4-7_20020101-20021231.nc',
 'cordex/tmp/pr_SAM-44_MIROC5_r1i1p1_RCA4_19510101-19551231.nc',
 'cordex/tmp/pr_CAM-44_IPSL-CM5A-MR_r1i1p1_RCA4_19860101-19901231.nc',
 'cordex/tmp/pr_CAM-22_HadGEM2-ES_r1i1p1_RegCM4-7_20480101-20481230.nc',
 'cordex/tmp/pr_CAM-22_GFDL-ESM2M_r1i1p1_RegCM4-7_19850101-19851231.nc',
 'cordex/tmp/pr_CAM-44_NorESM1-M_r1i1p1_RCA4_19910101-19951231.nc',
 'cordex/tmp/pr_CAM-22_MPI-ESM-LR_r1i1p1_REMO2015_19910101-19951231.nc',
 'cordex/tmp/pr_