In [36]:
from dask import delayed
from dask.utils import SerializableLock
from datetime import datetime, timedelta
from floater.generators import FloatSet
import bcolz
import numpy as np
import os, re
import pandas as pd

In [2]:
fname = '/data/scratch/rpa/global_lagrangian/output/int_090/traj_0090-0180.bcolz'

In [3]:
fs = FloatSet(xlim=(180, 230), ylim=(-80, 80), dx=0.03125, dy=0.03125)

In [4]:
Npart = fs.Nx*fs.Ny
Npart

8192000

In [5]:
datadir = os.path.dirname(fname)
datadir

'/data/scratch/rpa/global_lagrangian/output/int_090'

In [6]:
basename = os.path.splitext(os.path.basename(fname))[0]
basename

'traj_0090-0180'

In [7]:
day0, day1 = [int(day) for day in re.search('traj_(\d+)-(\d+)', basename).groups()]
day0, day1

(90, 180)

In [8]:
days = np.arange(day1-day0)
days

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89])

In [9]:
delta_t=86400
delta_t

86400

In [10]:
times = days*delta_t
times

array([      0,   86400,  172800,  259200,  345600,  432000,  518400,
        604800,  691200,  777600,  864000,  950400, 1036800, 1123200,
       1209600, 1296000, 1382400, 1468800, 1555200, 1641600, 1728000,
       1814400, 1900800, 1987200, 2073600, 2160000, 2246400, 2332800,
       2419200, 2505600, 2592000, 2678400, 2764800, 2851200, 2937600,
       3024000, 3110400, 3196800, 3283200, 3369600, 3456000, 3542400,
       3628800, 3715200, 3801600, 3888000, 3974400, 4060800, 4147200,
       4233600, 4320000, 4406400, 4492800, 4579200, 4665600, 4752000,
       4838400, 4924800, 5011200, 5097600, 5184000, 5270400, 5356800,
       5443200, 5529600, 5616000, 5702400, 5788800, 5875200, 5961600,
       6048000, 6134400, 6220800, 6307200, 6393600, 6480000, 6566400,
       6652800, 6739200, 6825600, 6912000, 6998400, 7084800, 7171200,
       7257600, 7344000, 7430400, 7516800, 7603200, 7689600])

In [11]:
date0=datetime(1993,1,1)
date0

datetime.datetime(1993, 1, 1, 0, 0)

In [12]:
refdate = date0 + timedelta(days=day0)
refdate

datetime.datetime(1993, 4, 1, 0, 0)

In [13]:
bc = bcolz.open(rootdir=fname, mode='r')
bc

ctable((753664023,), [('npart', '<f4'), ('time', '<f4'), ('x', '<f4'), ('y', '<f4'), ('vort', '<f4')])
  nbytes: 14.04 GB; cbytes: 6.11 GB; ratio: 2.30
  cparams := cparams(clevel=5, shuffle=True, cname='blosclz')
  rootdir := '/data/scratch/rpa/global_lagrangian/output/int_090/traj_0090-0180.bcolz'
[ (1297586.0, 4492800.0, 247.6322479248047, -56.92790603637695, 2.779736178126768e-06)
 (1297587.0, 4492800.0, 247.60421752929688, -57.070072174072266, 4.394159077492077e-06)
 (1297567.0, 4492800.0, 247.61593627929688, -56.94962692260742, 3.0349401640705764e-06)
 ...,
 (5060411.0, 7862400.0, 216.6854705810547, 19.999229431152344, -3.2957746043393854e-06)
 (4105022.0, 7862400.0, 215.76144409179688, 0.00031462014885619283, 1.5322804074457963e-06)
 (3828462.0, 7862400.0, 215.23580932617188, 0.000548539450392127, 3.845694209303474e-06)]

In [14]:
nt = len(times)
nt

90

In [15]:
num_floats = Npart
num_floats

8192000

In [16]:
npart = np.arange(1, num_floats+1)
npart

array([      1,       2,       3, ..., 8191998, 8191999, 8192000])

In [17]:
npart_range = npart[0], npart[-1]+1
npart_range

(1, 8192001)

In [18]:
lock = SerializableLock()
lock

<SerializableLock: 6c2b1432-a8bd-4da7-a03f-047c4803b8a2>

In [19]:
dtype = 'f4'
dtype

'f4'

In [20]:
npart_min, npart_max = npart_range
npart_min, npart_max

(1, 8192001)

In [21]:
time = times[0]
time

0

In [22]:
query = '(time==%g) & (npart>=%g) & (npart<=%g)' % (time, npart_min, npart_max)
query

'(time==0) & (npart>=1) & (npart<=8.192e+06)'

In [23]:
if lock is not None:
    lock.acquire()

In [24]:
df = pd.DataFrame(bc[query])
df.head()

Unnamed: 0,npart,time,x,y,vort
0,1025441,0,225.023438,-59.984375,1.224266e-06
1,1025442,0,225.054688,-59.984375,1.114438e-06
2,1025443,0,225.085938,-59.984375,8.762393e-07
3,1025444,0,225.117188,-59.984375,6.380402e-07
4,1025445,0,225.148438,-59.984375,3.998412e-07


In [30]:
df = df.set_index('npart', drop=True, verify_integrity=True)
df.head()

Unnamed: 0_level_0,time,x,y,vort
npart,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1025441,0,225.023438,-59.984375,1.224266e-06
1025442,0,225.054688,-59.984375,1.114438e-06
1025443,0,225.085938,-59.984375,8.762393e-07
1025444,0,225.117188,-59.984375,6.380402e-07
1025445,0,225.148438,-59.984375,3.998412e-07


In [31]:
df = df.reindex(npart)
df.head()

Unnamed: 0_level_0,time,x,y,vort
npart,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,0,180.023438,-79.984375,-5.316531e-10
2,0,180.054688,-79.984375,-5.323152e-10
3,0,180.085938,-79.984375,-5.330137e-10
4,0,180.117188,-79.984375,-5.337122e-10
5,0,180.148438,-79.984375,-5.344107e-10


In [34]:
fields=['x', 'y', 'vort']
fields

['x', 'y', 'vort']

In [35]:
data = df[fields].values
data

array([[  1.80023438e+02,  -7.99843750e+01,  -5.31653110e-10],
       [  1.80054688e+02,  -7.99843750e+01,  -5.32315192e-10],
       [  1.80085938e+02,  -7.99843750e+01,  -5.33013689e-10],
       ..., 
       [  2.29914062e+02,   7.99843750e+01,   6.34674995e-12],
       [  2.29945312e+02,   7.99843750e+01,   6.36987295e-12],
       [  2.29976562e+02,   7.99843750e+01,   6.88776553e-12]], dtype=float32)

In [None]:
delayed(bcolz_to_arrays)(bc, time, npart_range, fields, lock=lock, dtype=dtype), (num_floats, len(fields))