# Compute monthly means and store to file
## Force a 50\% data basis to compute a mean value

In [1]:
import os
import glob 

import numpy as np
import xarray as xr

from sclouds.helpers   import path_input, VARIABLES
from sclouds.io.filter import Filter

%matplotlib inline



In [2]:
def get_all_filesnames_from_one_variable(var):
    """Get all filenames from one variable."""
    return glob.glob(os.path.join(path_input, '*{}*.nc'.format(var)))

In [3]:
t2m_files = get_all_filesnames_from_one_variable('t2m')

In [4]:
t2m_files[:2]

['/home/hanna/lagrings/ERA5_monthly/2012_01_t2m.nc',
 '/home/hanna/lagrings/ERA5_monthly/2013_01_t2m.nc']

In [5]:
def get_date_and_mean_from_one_filename(absolute_path = '/home/hanna/lagrings/ERA5_monthly/2012_01_t2m.nc'):
    """ Computes the mean over the entire domain, only land and only sea
    
    Parameteres
    ----------------
    absolute_path : str
        The absolute path of the file.
        
    Returns 
    ----------------
    date : np.datetime64
        Date of this monthly average
    mean_all : float
        Mean over entire domain
    mean_land : float
        Mean over land
    mean_sea : float
        mean over sea
    """
    basename = os.path.basename(absolute_path)
    date     = np.datetime64('{}-{}'.format( basename[:4], basename[5:7]))
    var      = basename[8:].split('.')[0]
    # Generating all the data and filters. 
    try:
        data     = xr.open_dataset(absolute_path) # read the data
        f_land   = Filter('land').set_data(data = data, variable = var)
        f_sea    = Filter('sea').set_data(data = data, variable = var)

        mean_all  = data.mean()[var].values
        mean_land = f_land.get_mean()
        mean_sea  = f_sea.get_mean()
        return date, mean_all, mean_land, mean_sea
    except OSError:
        print("Didn't find file ... {}".format(absolute_path))
        return date, np.nan, np.nan, np.nan

In [6]:
a,b,c,d = get_date_and_mean_from_one_filename(absolute_path = '/home/hanna/lagrings/ERA5_monthly/2012_01_t2m.nc')

In [7]:
a,b,c,d

(numpy.datetime64('2012-01'),
 array(281.70844, dtype=float32),
 278.0635536854276,
 285.9598052625023)

In [8]:
storage = {}

In [9]:
VARIABLES

['t2m', 'sp', 'q', 'r', 'tcc']

In [None]:
for var in VARIABLES: # VARIABLES[:-1]
    
    alls  = []
    dates = []
    lands = []
    seas  = []
    
    files = get_all_filesnames_from_one_variable(var)
    for i, fil in enumerate(np.sort(files)):
        d, region, land, sea = get_date_and_mean_from_one_filename(fil)
        
        dates.append(d)
        alls.append(region)
        lands.append(land)
        seas.append(sea)
        
        if i%10 == 0:
            print('Number {}. Var {} '.format(i, var))
    
    storage[var] = alls
    storage['land_{}'.format(var)] = lands
    storage['sea_{}'.format(var)] = seas
    storage['date_{}'.format(var)] = dates # just to check that they are equal


Number 0. Var t2m 


In [None]:
data = xr.Dataset(storage)
data.to_netcdf('/home/hanna/lagrings/results/stats/monthly_mean/monthly_means.nc')

In [None]:
data 