In [1]:
%%capture
!pip install -U "xarray[complete]" netCDF4 


In [8]:
import pandas as pd
import boto3
import xarray as xr
import os
import zarr
import s3fs
from dask.diagnostics import ProgressBar
from datetime import datetime
from yaml import safe_load
from numcodecs import blosc
import logging
logging.basicConfig(format='%(asctime)s %(message)s')

In [10]:
# Don't forget to disable fixed env
s3 = s3fs.S3FileSystem()
os.environ["DATALAKE_CONF_PATH"] = "/home/ec2-user/SageMaker/datalake/conf/data/configuration.yml"
confType = 'agera5ArgumentsSolarFlux'
ymlPath = os.environ["DATALAKE_CONF_PATH"]
conf_args = safe_load(open(ymlPath, 'r'))  # Use safe_load for security
confFile = conf_args[confType]
ageraS3 = s3fs.S3Map(root=confFile['s3Raw'], s3=s3, check=False)
logging.warning("Loading files to append")
ncToLoad = os.listdir(confFile['pathforReggrid'])

2024-05-06 14:23:17,023 Loading files to append


In [17]:
def convertRadiation(k):
    c = k/1000000
    return c

def netcdfChange(path
                 , file
                 ,variableName: str
                ):
    dt = file.split('_')[-2]
    dt = datetime.strptime(dt,'%Y%m%d')
    f = xr.open_dataset(path+file)
    f = f.drop_vars(['crs'])
    f = f.rename_vars(name_dict = {'Band1':variableName})
    #time_da = xr.DataArray({'time': dt})
    f = f.assign_coords(time = dt)
    f = f.expand_dims(dim="time")
    return f

In [18]:
for _nc in sorted(ncToLoad):
    varName = "Solar-Radiation-Flux"
    _file = confFile['pathforReggrid']+_nc
    logging.warning("Opening %s ", _nc)
    nc = netcdfChange(confFile['pathforReggrid'],_nc,varName)
    nc = nc.apply(convertRadiation)
    nc = nc[['time','lat','lon',varName]]
    logging.warning("Organizing dimensions")
    compressor = zarr.Blosc(cname='lz4', clevel= 1, shuffle=False)
    blosc.set_nthreads(8) 
    encoding = {vname: {'compressor': compressor,'chunks': (1,1,2000,7200)} for vname in nc.data_vars}
    with ProgressBar():
        #print("Hellooooo")
        nc.to_zarr(ageraS3,  mode='a', append_dim='time', consolidated=True)
    logging.warning("Insert finished")
logging.warning("Process completed")

2024-05-06 14:35:20,380 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240101_final-v1.1.nc 
2024-05-06 14:35:20,578 Organizing dimensions
2024-05-06 14:35:20,579 Insert finished
2024-05-06 14:35:20,580 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240102_final-v1.1.nc 
2024-05-06 14:35:20,772 Organizing dimensions
2024-05-06 14:35:20,773 Insert finished
2024-05-06 14:35:20,774 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240103_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:20,965 Organizing dimensions
2024-05-06 14:35:20,966 Insert finished
2024-05-06 14:35:20,967 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240104_final-v1.1.nc 
2024-05-06 14:35:21,158 Organizing dimensions
2024-05-06 14:35:21,159 Insert finished
2024-05-06 14:35:21,160 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240105_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:21,350 Organizing dimensions
2024-05-06 14:35:21,352 Insert finished
2024-05-06 14:35:21,352 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240106_final-v1.1.nc 
2024-05-06 14:35:21,543 Organizing dimensions
2024-05-06 14:35:21,544 Insert finished
2024-05-06 14:35:21,545 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240107_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:21,736 Organizing dimensions
2024-05-06 14:35:21,737 Insert finished
2024-05-06 14:35:21,738 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240108_final-v1.1.nc 
2024-05-06 14:35:21,930 Organizing dimensions
2024-05-06 14:35:21,931 Insert finished
2024-05-06 14:35:21,931 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240109_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:22,122 Organizing dimensions
2024-05-06 14:35:22,123 Insert finished
2024-05-06 14:35:22,124 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240110_final-v1.1.nc 
2024-05-06 14:35:22,314 Organizing dimensions
2024-05-06 14:35:22,315 Insert finished
2024-05-06 14:35:22,316 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240111_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:22,506 Organizing dimensions
2024-05-06 14:35:22,507 Insert finished
2024-05-06 14:35:22,507 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240112_final-v1.1.nc 
2024-05-06 14:35:22,697 Organizing dimensions
2024-05-06 14:35:22,698 Insert finished
2024-05-06 14:35:22,699 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240113_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:22,888 Organizing dimensions
2024-05-06 14:35:22,889 Insert finished
2024-05-06 14:35:22,890 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240114_final-v1.1.nc 
2024-05-06 14:35:23,080 Organizing dimensions
2024-05-06 14:35:23,080 Insert finished
2024-05-06 14:35:23,081 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240115_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:23,271 Organizing dimensions
2024-05-06 14:35:23,272 Insert finished
2024-05-06 14:35:23,273 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240116_final-v1.1.nc 
2024-05-06 14:35:23,463 Organizing dimensions
2024-05-06 14:35:23,464 Insert finished
2024-05-06 14:35:23,465 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240117_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:23,655 Organizing dimensions
2024-05-06 14:35:23,656 Insert finished
2024-05-06 14:35:23,657 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240118_final-v1.1.nc 
2024-05-06 14:35:23,847 Organizing dimensions
2024-05-06 14:35:23,848 Insert finished
2024-05-06 14:35:23,849 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240119_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:24,040 Organizing dimensions
2024-05-06 14:35:24,041 Insert finished
2024-05-06 14:35:24,041 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240120_final-v1.1.nc 
2024-05-06 14:35:24,233 Organizing dimensions
2024-05-06 14:35:24,234 Insert finished
2024-05-06 14:35:24,235 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240121_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:24,423 Organizing dimensions
2024-05-06 14:35:24,424 Insert finished
2024-05-06 14:35:24,425 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240122_final-v1.1.nc 
2024-05-06 14:35:24,614 Organizing dimensions
2024-05-06 14:35:24,615 Insert finished
2024-05-06 14:35:24,616 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240123_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:24,806 Organizing dimensions
2024-05-06 14:35:24,807 Insert finished
2024-05-06 14:35:24,808 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240124_final-v1.1.nc 
2024-05-06 14:35:24,998 Organizing dimensions
2024-05-06 14:35:24,999 Insert finished
2024-05-06 14:35:24,999 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240125_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:25,190 Organizing dimensions
2024-05-06 14:35:25,191 Insert finished
2024-05-06 14:35:25,192 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240126_final-v1.1.nc 
2024-05-06 14:35:25,384 Organizing dimensions
2024-05-06 14:35:25,385 Insert finished
2024-05-06 14:35:25,385 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240127_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:25,576 Organizing dimensions
2024-05-06 14:35:25,577 Insert finished
2024-05-06 14:35:25,578 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240128_final-v1.1.nc 
2024-05-06 14:35:25,769 Organizing dimensions
2024-05-06 14:35:25,770 Insert finished
2024-05-06 14:35:25,771 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240129_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:25,962 Organizing dimensions
2024-05-06 14:35:25,963 Insert finished
2024-05-06 14:35:25,964 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240130_final-v1.1.nc 
2024-05-06 14:35:26,155 Organizing dimensions
2024-05-06 14:35:26,156 Insert finished
2024-05-06 14:35:26,156 Opening Solar-Radiation-Flux_C3S-glob-agric_AgERA5_20240131_final-v1.1.nc 


Hellooooo
Hellooooo


2024-05-06 14:35:26,347 Organizing dimensions
2024-05-06 14:35:26,348 Insert finished
2024-05-06 14:35:26,348 Process completed


Hellooooo


In [19]:
nc