In [1]:
import netCDF4
import csv
import os
import hashlib
import logging
from subprocess import run as srun
from urllib.parse import urlparse
from itertools import product
import time
import numpy as np
import xarray as xr
import rioxarray
import geopandas as gpd
from shapely.geometry import Polygon
import subprocess

In [2]:
logging.basicConfig(level=logging.INFO)

In [3]:
def download(uri, ofile, md5):
    srun(['curl', '-s', '-o', ofile, uri], capture_output=True, check=True)
    md5dld = str(hashlib.md5(open(ofile, 'rb').read()).hexdigest())
    if md5 != md5dld:
        logging.warning("%s != %s", md5, md5dld)
        logging.info("uri %s (%s == %s) : %s", uri, md5, md5dld, ofile)

**FIRST CODE TRYING TO DOWNLOAD DATA (WHOLE RASTER)**

In [4]:
# years=[1985]

In [5]:
# models=['EC-Earth3_','TaiESM1']

In [6]:
# scenarios=['historical','ssp126','ssp370']

In [7]:
# variables=['pr','sfcWind']

In [8]:
# future_1=list(range(2025, 2046))

In [9]:
# future_2=list(range(2045, 2066))

In [10]:
# with open('gddp-cmip6-thredds-fileserver.csv') as index:
#     fobjects = csv.reader(index)
#     next(fobjects)
#     for objs in fobjects:
#         md5, uri = [o.strip() for o in objs]
#         prsout = urlparse(uri)
#         ofile = os.path.split(prsout.path)[1]

#         if (any(str(year) in ofile for year in years) and
#             any(model in ofile for model in models) and
#             any(variable in ofile for variable in variables) and
#             any(scenario in ofile for scenario in scenarios)):

#             print(f"Downloading: {ofile}")  # Debug print
#             download(uri, ofile, md5)

**DOWNLOAD RASTER ONLY FOR URUGUAY REGION AND COMPUTES MONTHLY ACCUMULATED**

In [11]:
def filter_and_download(csv_path, years, models, variables, scenarios, shape):
    with open(csv_path) as index:
        fobjects = csv.reader(index)
        next(fobjects)  # Skip header

        # Filter and download each file that matches the criteria
        for objs in fobjects:
            md5, uri = [o.strip() for o in objs]

            # Extract the filename from the URL path
            prsout = urlparse(uri)
            ofile = os.path.split(prsout.path)[1]
    
            # Download the file only if it matches the filtering criteria
            if any(str(year) in ofile for year in years) and \
               any(model in ofile for model in models) and \
               any(variable in ofile for variable in variables) and \
               any(scenario in ofile for scenario in scenarios):
        
                print(f"Downloading: {ofile}")  # Debug print
        
                # Download the full data file
                download(uri, ofile, md5)

                filename = os.path.splitext(os.path.basename(uri))[0]
        
                # Open the dataset after downloading
                data = os.path.join(os.getcwd(), ofile)  # Update with correct path
                ds_GDDP6 = xr.open_dataset(data)
        
                # Set spatial dims and CRS
                ds_GDDP6 = ds_GDDP6.rio.set_spatial_dims(x_dim="lon", y_dim="lat")
                ds_GDDP6.rio.write_crs("epsg:4326", inplace=True)
                ds_GDDP6.coords['lon'] = (ds_GDDP6.coords['lon'] + 180) % 360 - 180
                ds_GDDP6 = ds_GDDP6.sortby(ds_GDDP6.lon)
        
                # Clip the data to the Uruguay region
                ds_GDDP6 = ds_GDDP6.rio.set_spatial_dims(x_dim="lon", y_dim="lat")
                ds_masked = ds_GDDP6.rio.clip(shape.geometry, shape.crs, drop=True)

                if 'pr' in variables:
                # if variables=='pr':
                    # Convert and round the precipitation data (daily precipitation in mm/day)
                    secs_in_a_day = 24 * 60 * 60
                    ds_masked['pr_mmd'] = (ds_masked['pr'] * secs_in_a_day).round(2).astype('float32')
        
                # Save the filtered data to a NetCDF file
                ds_masked.to_netcdf(f"filtered_data/{filename}_masked.nc")
        
                # Delete the original unfiltered file to save space
                os.remove(data)
                print(f"Deleted original unfiltered file: {ofile}")
        
                # Explicitly close datasets to free memory
                ds_GDDP6.close()
                ds_masked.close()

In [12]:
# Define the shapefile for Uruguay area (using GeoPandas)
shape = gpd.read_file("ury_adm_2020_shp.zip")

  result = read_func(


In [19]:
# Set the filtering parameters
years = list(range(2025, 2044)) #list(range(2055, 2065)) # #list(range(1985, 2015))
models= ['TaiESM1'] #['TaiESM1'] #['EC-Earth3_'] 
variables = ['pr'] #['sfcWind'] #["pr"] #['tas_']
scenarios = ['ssp370'] #["historical"] #['ssp126'] #['ssp370'] # #["historical"]

In [20]:
# Run the process
filter_and_download('gddp-cmip6-thredds-fileserver.csv', years, models, variables, scenarios, shape)

Downloading: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2025.nc
Deleted original unfiltered file: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2025.nc
Downloading: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2030.nc
Deleted original unfiltered file: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2030.nc
Downloading: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2027.nc
Deleted original unfiltered file: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2027.nc
Downloading: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2031.nc
Deleted original unfiltered file: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2031.nc
Downloading: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2029.nc
Deleted original unfiltered file: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2029.nc
Downloading: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2026.nc
Deleted original unfiltered file: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2026.nc
Downloading: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2033.nc
Deleted original unfiltered file: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2033.nc
Downloading: pr_day_TaiESM1_ssp370_r1i1p1f1_gn_2028.nc
Deleted original unfiltered 

In [15]:
# def filter_and_download_2(csv_path, years, models, variables, scenarios, north, south, east, west):
#     with open(csv_path) as index:
#         fobjects = csv.reader(index)
#         next(fobjects)  # Skip header

#         # Filter and download each file that matches the criteria
#         for objs in fobjects:
#             md5, uri = [o.strip() for o in objs]

#             # Extract the filename from the URL path
#             prsout = urlparse(uri)
#             ofile = os.path.split(prsout.path)[1]

#             # Check if the file matches filtering criteria
#             if any(str(year) in ofile for year in years) and \
#                any(model in ofile for model in models) and \
#                any(variable in ofile for variable in variables) and \
#                any(scenario in ofile for scenario in scenarios):
                
#                 print(f"Downloading subset for: {ofile}")  # Debug print
                
#                 # Construct the NCSS request URL
#                 base_url = uri.replace("fileServer", "ncss/grid")
#                 subset_url = (
#                     f"{base_url}?var={variables[0]}&north={north}&south={south}"
#                     f"&east={east}&west={west}&horizStride=1"
#                     f"&time_start={years[0]}-01-01T00:00:00Z"
#                     f"&time_end={years[-1]}-12-31T23:59:59Z"
#                     f"&accept=netcdf3&addLatLon=true"
#                 )

#                 # Define output filename
#                 filename = os.path.splitext(os.path.basename(uri))[0] + "_subset.nc"

#                 # Use wget to download the subset
#                 subprocess.run(["wget", "-O", filename, subset_url])

#                 print(f"Downloaded: {filename}")

In [16]:
# south=-36
# north=-29
# east=-60
# west=-52

In [17]:
# # Set the filtering parameters
# years = '1985' #list(range(1985, 2015)) #list(range(2025, 2065)) #list(range(1985, 2015))
# models=['TaiESM1'] #['EC-Earth3_'] #
# variables = ['sfcWind'] #["pr"]
# scenarios = ["historical"] #['ssp370'] # #['ssp126']

In [18]:
# filter_and_download_2('gddp-cmip6-thredds-fileserver.csv', years, models, variables, scenarios, north, south, east, west)