## Generate data 

In [1]:
import sys
sys.path.append('/'.join(sys.path[0].split('/')[:-1]))

import os
import xarray as xr
import numpy as np
import time
import matplotlib.pyplot as plt
import healpy as hp

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader


In [2]:
datadir = "../data/healpix/5.625deg_nearest/"

lr=1e-4
dr=0
batch_size=128
patience=3
train_years=('1979', '2015')
valid_years=('2016', '2016')
test_years=('2017', '2018')
gpu=1
iterative=False

vars = ['z', 't']
kernel_size = 5

In [3]:
z = xr.open_mfdataset(f'{datadir}geopotential_500/*.nc', combine='by_coords')
t = xr.open_mfdataset(f'{datadir}temperature_850/*.nc', combine='by_coords')
ds = xr.merge([z, t], compat='override')

In [4]:
def create_iterative_observations_healpix(ds, lead_time, max_lead_time, nb_timesteps, test_years, nodes):
    
    lead_times = np.arange(lead_time, max_lead_time + lead_time, lead_time)

    data = ds.to_array(dim='level', name='Dataset').transpose('time', 'node', 'level')
    n_samples = data.isel(time=slice(0, -nb_timesteps*lead_time)).shape[0] - max_lead_time

    obs_list = []
    
    print('Generating observations list...')
    for lead in lead_times:
        obs_list.append(data.isel(time=slice(lead, lead + n_samples)).isel(level=slice(0, 2)))

    #observations_numpy = np.array(obs_list)
    observations_joint = xr.concat(obs_list, dim='lead_time')
    #return observations_numpy
    print('Obtaining coordinates...')
    # Lat lon coordinates
    nside = int(np.sqrt(nodes/12))
    out_lon, out_lat = hp.pix2ang(nside, np.arange(nodes), lonlat=True)
    
    print('Generate set of times to study', end='\n')
    # Actual times
    start = np.datetime64(test_years[0], 'h') + np.timedelta64(lead_time, 'h')
    stop = start + np.timedelta64(n_samples, 'h')
    times = np.arange(start, stop)

    # Variables
    var_dict_out = {var: None for var in ['z', 't']}

    das = [];
    lev_idx = 0
   
    for var, levels in var_dict_out.items():
        das.append(observations_joint.isel(level=lev_idx).rename(var))
        lev_idx +=1
        
        """
        
        
        if levels is None:            
            das.append(xr.DataArray(
                observations_numpy[:, :, :, lev_idx],
                dims=['lead_time', 'time', 'node'],
                coords={'lead_time': lead_times, 'time': times, 'node': np.arange(nodes)},
                name=var
            ))
            lev_idx += 1
        # never this case...
        else:
            nlevs = len(levels)
            das.append(xr.DataArray(
                observations_numpy[:, :, :, lev_idx:lev_idx+nlevs],
                dims=['lead_time', 'time', 'node', 'level'],
                coords={'lead_time': lead_times, 'time': valid_time, 'node': nodes, 'level': nlevs},
                name=var
            ))
            lev_idx += nlevs
       """
    
    print('\nGenerate observation...')
    observation_ds = xr.merge(das, compat='override').reset_coords(names='level', drop=True)
    observation_ds = observation_ds.assign_coords({'lat': out_lat, 'lon': out_lon})
    return observation_ds


In [5]:
nodes = 12*16*16
max_lead_time = 5*24
lead_time = 6
out_features = 2
nb_timesteps = 2
nside = int(np.sqrt(nodes/12))

In [6]:
#obs = create_iterative_observations_hp("../data/equiangular/5.625deg/", test_years, lead_time, max_lead_time, nside, nb_timesteps=2)

In [7]:
obs = create_iterative_observations_healpix(ds, lead_time, max_lead_time, nb_timesteps, test_years, nodes)

Generating observations list...
Obtaining coordinates...
Generate set of times to study

Generate observation...


In [8]:
obs

Unnamed: 0,Array,Chunk
Bytes,86.17 GB,4.31 GB
Shape,"(20, 350622, 3072)","(1, 350622, 3072)"
Count,375 Tasks,20 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 86.17 GB 4.31 GB Shape (20, 350622, 3072) (1, 350622, 3072) Count 375 Tasks 20 Chunks Type float32 numpy.ndarray",3072  350622  20,

Unnamed: 0,Array,Chunk
Bytes,86.17 GB,4.31 GB
Shape,"(20, 350622, 3072)","(1, 350622, 3072)"
Count,375 Tasks,20 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,86.17 GB,4.31 GB
Shape,"(20, 350622, 3072)","(1, 350622, 3072)"
Count,375 Tasks,20 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 86.17 GB 4.31 GB Shape (20, 350622, 3072) (1, 350622, 3072) Count 375 Tasks 20 Chunks Type float32 numpy.ndarray",3072  350622  20,

Unnamed: 0,Array,Chunk
Bytes,86.17 GB,4.31 GB
Shape,"(20, 350622, 3072)","(1, 350622, 3072)"
Count,375 Tasks,20 Chunks
Type,float32,numpy.ndarray


In [9]:
obs.to_netcdf(datadir + 'observations.nc')

### Use new functions from Iciar

In [4]:
from modules.test import create_iterative_observations_hp

In [5]:
nodes = 12*16*16
max_lead_time = 5*24
lead_time = 6
out_features = 2
nb_timesteps = 2
nside = int(np.sqrt(nodes/12))

In [10]:
def create_iterative_observations_hp(input_dir, test_years, lead_time, max_lead_time, nside, nb_timesteps=2):
    z500 = xr.open_mfdataset(f'{input_dir}geopotential_500/*.nc', combine='by_coords').sel(time=slice(*test_years))
    t850 = xr.open_mfdataset(f'{input_dir}temperature_850/*.nc', combine='by_coords').sel(time=slice(*test_years))

    test_data = xr.merge([z500, t850], compat='override')


    n_samples = test_data.isel(time=slice(0, -nb_timesteps*lead_time)).dims['time'] - max_lead_time
    nb_iter = max_lead_time // lead_time
    n_pixels = 12*(nside**2)
    print(n_samples)

    # Lead times
    lead_times = np.arange(lead_time, max_lead_time + lead_time, lead_time)

    # Lat lon coordinates
    out_lon, out_lat = hp.pix2ang(nside, np.arange(n_pixels), lonlat=True)

    # Actual times
    start = np.datetime64(test_years[0], 'h') + np.timedelta64(lead_time, 'h')
    stop = start + np.timedelta64(n_samples, 'h')
    times = np.arange(start, stop, 1)

    # Variables
    data_vars = ['z', 't']
    var_dict_out = {var: None for var in data_vars}
    
    data = np.zeros((2, nb_iter, n_samples, 3072))
    
    #data = np.zeros((2, nb_iter, n_samples, 32, 64))
    for i in range(nb_iter):
        #data[0, i, :, :, :] = test_data.z.isel(time=slice(lead_time*(i+1), lead_time*(i+1) + n_samples)).values
        #data[1, i, :, :, :] = test_data.t.isel(time=slice(lead_time*(i+1), lead_time*(i+1) + n_samples)).values
        
        data[0, i, :, :] = test_data.z.isel(time=slice(lead_time*(i+1), lead_time*(i+1) + n_samples)).values
        data[1, i, :, :] = test_data.t.isel(time=slice(lead_time*(i+1), lead_time*(i+1) + n_samples)).values


    das = [];
    lev_idx = 0
    for var in data_vars:       
        das.append(xr.DataArray(
         data[lev_idx, :, :, :],
         dims=['lead_time', 'time', 'node'],
         coords={'lead_time': lead_times, 'time': times, 'node': np.arange(n_pixels)},
         name=var
     ))
        lev_idx += 1
    observations = xr.merge(das)
    #observations = observations.assign_coords({'lat': out_lat, 'lon': out_lon})

    return observations

In [11]:
observations = create_iterative_observations_hp('../data/healpix/5.625deg_nearest/', test_years, lead_time, max_lead_time, nside, nb_timesteps=2)

17388


In [13]:
observations

In [15]:
n_pixels = 12*(nside**2)
out_lon, out_lat = hp.pix2ang(nside, np.arange(n_pixels), lonlat=True)

In [18]:
observations = observations.assign_coords({'lat': ('node', out_lat), 'lon': ('node', out_lon)})

In [19]:
observations.to_netcdf('../data/healpix/predictions/' + 'observations_nearest.nc')