# Instructions
1. Run the cell below
2. pick the number of days you want to generate data from: num_days
3. call make_training_data(days=num_days)
Note: the only year available right now is 1979


In [14]:
import netCDF4 as nc
from os import listdir, system
from os.path import isfile, join, isdir
import re
import numpy as np

def make_time_slice(dataset, time, variables, x=768, y=1152):
    variables_at_time_slice = [dataset[k][time] for k in variables]
    tensor = np.vstack(variables_at_time_slice).reshape(len(variables), x,y)
    return tensor

def make_spatiotemporal_tensor(dataset,num_time_slices, variables, x=768, y=1152):
    time_slices = [ make_time_slice(dataset, time, variables) for time in range(num_time_slices) ]
    tensor = np.vstack(time_slices).reshape(num_time_slices, len(variables), x, y)
    return tensor

def make_training_data(year=1979, days=5):
    # this directory can be accessed from cori
    maindir="/project/projectdirs/dasrepo/gordon_bell/climate/data/big_images/" + \
    str(year) 
    
    lsdir=listdir(maindir)

    rpfile = re.compile(r"^cam5_.*\.nc$")
    camfiles = [f for f in lsdir if rpfile.match(f)]
    
    datasets = [ nc.Dataset(maindir+'/'+camfile, "r", format="NETCDF4") \
                for camfile in camfiles[:days] ]
    
    variables = [u'PRECT',
                 u'PS',
                 u'PSL',
                 u'QREFHT',
                 u'T200',
                 u'T500',
                 u'TMQ',
                 u'TREFHT',
                 u'TS',
                 u'U850',
                 u'UBOT',
                 u'V850',
                 u'VBOT',
                 u'Z1000',
                 u'Z200',
                 u'ZBOT']
    time_steps=8
    x=768
    y=1152
    day_slices = [make_spatiotemporal_tensor(dataset,time_steps,variables) for dataset in datasets]
    tr_data = np.vstack(day_slices).reshape(len(datasets), time_steps,len(variables), x, y)
    
    return tr_data

In [12]:
x_tr=make_training_data(days=2)

In [13]:
print x_tr.shape

(2, 8, 16, 768, 1152)
