#### Utility Functions

In [2]:
import numpy as np
import netCDF4 as nc
from netCDF4 import Dataset
import os
from utilities import *

In [129]:
# TESTING:
if False:
    with Dataset(source_file, 'r') as src:
        for k in src.ncattrs():
            print(f"{k}: {src.getncattr(k)}")
        
        for name, dimension in src.dimensions.items():
            if not name == 'expver':
                print(f"{name}: {len(dimension) if not dimension.isunlimited() else 'unlimited'}")
        
        for name, variable in src.variables.items():
            if name in ['time', 'latitude', 'longitude', 'u', 'v']:
                print(f"{name}: {variable.datatype}, {variable.dimensions}")
                for k in variable.ncattrs():
                    print(f"  {k}: {variable.getncattr(k)}")

Conventions: CF-1.6
history: 2024-07-12 00:20:23 GMT by grib_to_netcdf-2.28.1: /opt/ecmwf/mars-client/bin/grib_to_netcdf -S param -o /cache/data8/adaptor.mars.internal-1720743622.4797337-1546-7-491f5957-9e6d-4a44-b45a-e3e27d89a58e.nc /cache/tmp/491f5957-9e6d-4a44-b45a-e3e27d89a58e-adaptor.mars.internal-1720743617.0208066-1546-3-tmp.grib
longitude: 1440
latitude: 721
time: 18
longitude: float32, ('longitude',)
  units: degrees_east
  long_name: longitude
latitude: float32, ('latitude',)
  units: degrees_north
  long_name: latitude
time: int32, ('time',)
  units: hours since 1900-01-01 00:00:00.0
  long_name: time
  calendar: gregorian
u: int16, ('time', 'expver', 'latitude', 'longitude')
  scale_factor: 0.0006413783645530925
  add_offset: -0.1487353009986828
  _FillValue: -32767
  missing_value: -32767
  units: m s**-1
  long_name: U component of wind
  standard_name: eastward_wind
v: int16, ('time', 'expver', 'latitude', 'longitude')
  scale_factor: 0.0006212465854009293
  add_offset: 

### Changing format of other files

In [4]:
# Convert your NetCDF3 file to NetCDF4
folder_path = './data/'
ERA5_files = ['ERA5_SST_1940-2024.nc'] 
variables = ['sst']

for file, var in zip(ERA5_files, variables):
    final_file = file.replace('.nc', '_converted.nc')
    convert_to_netcdf4(folder_path, file, final_file, var)

Data shape:  (1015, 721, 1440)
Converted ERA5_SST_1940-2024.nc to ERA5_SST_1940-2024_converted.nc


In [6]:
# Confirm everything is correct
for file, var in zip(ERA5_files, variables):
    data = Dataset(folder_path + file.replace('.nc', '_converted.nc'), 'r')
    print(data)
    print(data.variables[var].shape)
    data.close()

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    Conventions: CF-1.6
    history: 2024-08-22 23:46:44 GMT by grib_to_netcdf-2.28.1: /opt/ecmwf/mars-client/bin/grib_to_netcdf -S param -o /cache/data9/adaptor.mars.internal-1724370372.7910786-23845-12-222df68d-3e3b-4111-9aec-312e5d5b17fd.nc /cache/tmp/222df68d-3e3b-4111-9aec-312e5d5b17fd-adaptor.mars.internal-1724370271.2287602-23845-7-tmp.grib
    dimensions(sizes): longitude(1440), latitude(721), time(1015)
    variables(dimensions): float32 longitude(longitude), float32 latitude(latitude), int32 time(time), int16 sst(time, latitude, longitude)
    groups: 
(1015, 721, 1440)


In [7]:
# detrend the datasets
!cdo detrend ./data/ERA5_SST_1940-2024_converted.nc ./data/ERA5_SST_1940-2024_converted_detrend.nc

cdo    detrend:                        1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9 9 9 910[32mcdo    detrend: [0mProcessed 1053813600 values from 1 variable over 1015 timesteps [10.88s 6943MB]


In [9]:
# Confirm everything is correct in the detrended files 
for file, var in zip(ERA5_files, variables):
    data = Dataset(folder_path + file.replace('.nc', '_converted_detrend.nc'), 'r')
    print(data)
    print(data.variables[var].shape)
    data.close()

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    CDI: Climate Data Interface version 2.4.2 (https://mpimet.mpg.de/cdi)
    Conventions: CF-1.6
    history: Fri Aug 23 01:17:35 2024: cdo detrend ./data/ERA5_SST_1940-2024_converted.nc ./data/ERA5_SST_1940-2024_converted_detrend.nc
2024-08-22 23:46:44 GMT by grib_to_netcdf-2.28.1: /opt/ecmwf/mars-client/bin/grib_to_netcdf -S param -o /cache/data9/adaptor.mars.internal-1724370372.7910786-23845-12-222df68d-3e3b-4111-9aec-312e5d5b17fd.nc /cache/tmp/222df68d-3e3b-4111-9aec-312e5d5b17fd-adaptor.mars.internal-1724370271.2287602-23845-7-tmp.grib
    CDO: Climate Data Operators version 2.4.2 (https://mpimet.mpg.de/cdo)
    dimensions(sizes): time(1015), longitude(1440), latitude(721)
    variables(dimensions): int32 time(time), float32 longitude(longitude), float32 latitude(latitude), float32 sst(time, latitude, longitude)
    groups: 
(1015, 721, 1440)


In [10]:
# Upload for final check

from utils_pca_fun import *
import pandas as pd
import numpy as np
from netCDF4 import Dataset
from geo_field_jakob import GeoField

d = {}

ERA5_files = [
    'ERA5_SST_1940-2024_converted_detrend.nc',
    # 'PRATE_NCEP_NCAR_Reanalysis_1948-2024.nc'
    ]

variables = [
    'sst',
    # 'prate'
    ]

lats = [-66, 48] # SST
# lats = [-20, 1] # PRATE

lons = [150, 360] # SST
# lons = [310, 325] # PRATE

d['data_parameters'] = {
    'folder_name' : './data/',
    'load_filename' :  ERA5_files[0], 
    'varname' : variables[0],
                
    'use_cdftime' : True,
    'from_date' : datetime(1948, 1, 1),
    'to_date' : datetime(2024, 1, 1), 
    'anomalize': 'means_variance',
    'anomalize_base' :  (1948, 2019),
    'slice_lat' : lats, # [-20, 1], #
    'slice_lon' : lons, # [310, 325], # 
    'level' : None,   
    'verbosity' : 2,
    }

geo_object = load_data(**d['data_parameters'])
data = geo_object.data()
start_end_date = [geo_object.start_date, geo_object.end_date]

INFO:root:funciona


Loading data:
	Original date range 1940-01-01 00:00:00 - 2024-07-01 00:00:00 
Original data shape (1015, 721, 1440)
	Slicing lon = [150, 360], lat = [-66, 48]
	anomalize means_variance with base period (1948, 2019)
Stard and end dates:  1940-01-01 00:00:00 2024-07-01 00:00:00
	Slicing from = 1948-01-01 00:00:00 to 2024-01-01 00:00:00
Stard and end dates after slicing:  1948-01-01 00:00:00 2023-12-01 00:00:00
