## Generate data 

In [1]:
import sys
sys.path.append('/'.join(sys.path[0].split('/')[:-1]))

import os
import xarray as xr
import numpy as np
import time
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader

from modules.test import create_iterative_observations_healpix

In [2]:
datadir = "../data/healpix/5.625deg/"

lr=1e-4
dr=0
batch_size=128
patience=3
train_years=('1979', '2015')
valid_years=('2016', '2016')
test_years=('2017', '2018')
gpu=1
iterative=False

vars = ['z', 't']
kernel_size = 5

In [3]:
z = xr.open_mfdataset(f'{datadir}geopotential_500/*.nc', combine='by_coords')
t = xr.open_mfdataset(f'{datadir}temperature_850/*.nc', combine='by_coords')
ds = xr.merge([z, t], compat='override')

In [4]:
def create_iterative_observations_healpix(ds, lead_time, max_lead_time, nb_timesteps, test_years, nodes):
    
    lead_times = np.arange(lead_time, max_lead_time + lead_time, lead_time)

    data = ds.to_array(dim='level', name='Dataset').transpose('time', 'node', 'level')
    n_samples = data.isel(time=slice(0, -nb_timesteps*lead_time)).shape[0] - max_lead_time

    obs_list = []
    
    print('Generating observations list...')
    
    for lead in lead_times:
        obs_list.append(data.isel(time=slice(lead, lead + n_samples)).isel(level=slice(0, 2)).values)

    observations_numpy = np.array(obs_list)

    print('Obtaining coordinates...')
    # Lat lon coordinates
    nside = int(np.sqrt(nodes/12))
    out_lon, out_lat = hp.pix2ang(nside, np.arange(nodes), lonlat=True)
    
    print('Generate set of times to study', end='\n')
    # Actual times
    start = np.datetime64(test_years[0], 'h') + np.timedelta64(lead_time, 'h')
    stop = start + np.timedelta64(n_samples, 'h')
    times = np.arange(start, stop)

    # Variables
    var_dict_out = {var: None for var in ['z', 't']}

    das = [];
    lev_idx = 0
   
    i = 0
    for var, levels in var_dict_out.items():
        if levels is None:            
            das.append(xr.DataArray(
                observations_numpy[:, :, :, lev_idx],
                dims=['lead_time', 'time', 'node'],
                coords={'lead_time': lead_times, 'time': times, 'node': np.arange(nodes)},
                name=var
            ))
            lev_idx += 1

        else:
            nlevs = len(levels)
            das.append(xr.DataArray(
                observations_numpy[:, :, :, lev_idx:lev_idx+nlevs],
                dims=['lead_time', 'time', 'node', 'level'],
                coords={'lead_time': lead_times, 'time': valid_time, 'node': nodes, 'level': nlevs},
                name=var
            ))
            lev_idx += nlevs
        print('\r{}'.format(i), end='')
        i += 1
    
    print('\nGenerate observation...')
    observation_ds = xr.merge(das)
    observation_ds = observation_ds.assign_coords({'lat': out_lat, 'lon': out_lon})
    return observation_ds


In [5]:
nodes = 12*16*16
max_lead_time = 5*24
lead_time = 6
out_features = 2
nb_timesteps = 2


In [6]:
obs = create_iterative_observations_healpix(ds, lead_time, max_lead_time, nb_timesteps, test_years, nodes)

Generating observations list...


MemoryError: Unable to allocate 160. GiB for an array with shape (20, 350508, 3072, 2) and data type float32