## Notebook to prepare the ORA-20C data to be able to save the datasets locally

The next step would be to create the datasets for lateral forcing.

In this notebook:
- The seperate files for each year per variable are combined into 1 file
- The region is shrinked to 32°N to 66°N and -24°W to 14°E
- The longitudes are changed from 0 to 360 to -180 to 180



The ORA-20C data is:
- 1900 - 2010
- monthly
- 1.0° x 1.0°

The ORA-20C data is downloaded from server ftp://ftp-icdc.cen.uni-hamburg.de path: /ora20c/ between 15 and 21 March


 


### All preparation steps executed separately for opa0

In [1]:
# Import necessary packages

import numpy as np
import xarray as xr

In [2]:
def import_ORA_20C(ensemble_member, var_name):
    '''
    Function to import the individual variables of ORA_20C 
    '''
    folder = f'/Volumes/Iris 300 GB/{ensemble_member}/{var_name}_ora20c_1m_*_grid_1x1.nc'
    
    data = xr.open_mfdataset(folder, parallel = True)
    
    return data

In [3]:
# Import ORA-20C files 
so_opa0 = import_ORA_20C('opa0', 'so')
thetao_opa0 = import_ORA_20C('opa0', 'thetao')
uo_opa0 = import_ORA_20C('opa0', 'uo')
vo_opa0 = import_ORA_20C('opa0', 'vo')
zos_opa0 = import_ORA_20C('opa0', 'zos')

In [4]:
def change_longitude_values(data):
    '''
    Function to change the longitude values from 0 to 360 into -180 to 180
    '''
    
    longitudes = np.concatenate([np.arange(0.5, 180), np.arange(-179.5, 0)]) # Define array containing the correct longitude values
    
    data_changed = data.assign_coords(lon = longitudes) # Assign the values to the coordinate
    
    #data_changed = data_changed.sortby('lon') # Sort longitudes increasing
    
    
    return data_changed

In [5]:
# Change longitude from 0 to 360 to -180 to 180
so_opa0 = change_longitude_values(so_opa0)
thetao_opa0 = change_longitude_values(thetao_opa0)
uo_opa0 = change_longitude_values(uo_opa0)
vo_opa0 = change_longitude_values(vo_opa0)
zos_opa0 = change_longitude_values(zos_opa0)


In [6]:
def shrink_region(data):
    '''
    Function to shrink the spatial region of the data from global to the ROMS region (32°N to 66°N and -24°W to 14°E)
    
    '''
    
    data_shr = data.where((data.lat > 32) & (data.lat < 66) & (data.lon > -24 ) & (data.lon < 14) , drop = True) # Select region
    

    
    return data_shr

In [7]:
# Shrink the regional extent of the data from global to the ROMS region
so_opa0 = shrink_region(so_opa0)
thetao_opa0 = shrink_region(thetao_opa0)
uo_opa0 = shrink_region(uo_opa0)
vo_opa0 = shrink_region(vo_opa0)
zos_opa0 = shrink_region(zos_opa0)

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': Fa

In [8]:
def combine_variables(variables):
    '''
    Function that combines all variables into one xarray.dataset
    '''
    
    return xr.merge(variables)

In [9]:
opa0 = combine_variables([so_opa0.so, thetao_opa0.thetao, uo_opa0.uo, vo_opa0.vo, zos_opa0.zos])

In [10]:
def save_data(data, name):
    '''
    Function that saves the resulting xarray.dataset in a local folder
    '''
    
    data.to_netcdf(f'/Users/iriskeizer/Documents/ROMS/data/ORA20C/{name}.nc')

In [None]:
# Save the opa0 dataset in a local folder
save_data(opa0, 'opa0_prepared')

### All preparation steps executed using one function for the other ensemble members

In [31]:
def prepare_ensemble_member(ensemble_member):
    '''
    Function that prepares an emsemble member for further analysis by:
    - importing all variables
    - changing the longitude values
    - shrink the domain
    '''
    
    variables = ['so', 'thetao', 'uo', 'vo', 'zos']
    
    lst = [] # List to save the data for all the variables
    
    for var in variables:
        
        data = import_ORA_20C(ensemble_member, var) 
        
        data = change_longitude_values(data) 
        
        data = shrink_region(data)
        
        lst.append(data[var])
      

    data = combine_variables(lst)
    
    save_data(data, ensemble_member + '_prepared')
    
    return data
    
    

In [32]:
opa1 = prepare_ensemble_member('opa1')

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': Fa

In [28]:
data = combine_variables(opa1)

In [None]:
opa2 = prepare_ensemble_member('opa2')

In [None]:
opa3 = prepare_ensemble_member('opa3')

In [None]:
opa4 = prepare_ensemble_member('opa4')

In [None]:
opa5 = prepare_ensemble_member('opa5')

In [None]:
opa6 = prepare_ensemble_member('opa6')