In [None]:
from seqdata.core import *
from seqdata.model import *
from fastai2.basics import *
from fastai2.callback.progress import *

In [None]:
#hide
%load_ext line_profiler
%load_ext snakeviz

In [None]:
hdf_files = get_hdf_files('test_data/')
src = source_items(hdf_files,[DfHDFCreateWindows(win_sz=1000+1,stp_sz=30,clm='current')])
src[:2]

[{'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 0,
  'r_slc': 1001},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 30,
  'r_slc': 1031}]

In [None]:
tfms=[  [HDF2Sequence(['current','voltage'])],
        [HDF2Sequence(['voltage'])]]
splits = ParentSplitter()([x['path'] for x in src])
dsrc = DataSource(src,tfms=tfms,splits=splits)

In [None]:
%%timeit
dsrc[0]

144 µs ± 769 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
%%snakeviz
dsrc[0]

 
*** Profile stats marshalled to file '/tmp/tmpb3lutpaq'. 
Embedding SnakeViz in this document...


`HDF2Sequence` kostet praktisch keine Rechenzeit, die Transforms und df iteration kostet fast die gesamte Zeit

# Test Type Transform Runtime
Fastai2 has a lot of function calls, that consume a lot of time. Reduce transform complexity to analyse overhead.

## getitem performance baseline
Getting items from lists and arrays is very fast.

In [None]:
l = [0]*10000

In [None]:
%%timeit
l[0]

38.8 ns ± 0.218 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [None]:
a = np.array(l)

In [None]:
%%timeit
a[0]

183 ns ± 1.15 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


## getitem datasource without transforms
without any transformations there is only the calling overhead from datasource

In [None]:
dsrc = DataSource(l)

In [None]:
%%timeit
dsrc[0]

64.7 µs ± 1.63 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


It already takes 1000 times more time then direct calls from a list

## getitem datasource with noop transforms
The first Transform is practically for free and every consecutive transform adds 50-60 us to execution time without doing anything.

In [None]:
tfms=[[noop]]
dsrc = DataSource(l,tfms=tfms)

In [None]:
%%timeit
dsrc[0]

59.9 µs ± 130 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
tfms=[[noop]]*2
dsrc = DataSource(src_df.path,tfms=tfms)

In [None]:
%%timeit
dsrc[0]

156 µs ± 919 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
tfms=[[noop]*2]*2
dsrc = DataSource(l,tfms=tfms)

In [None]:
%%timeit
dsrc[0]

221 µs ± 2.01 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [None]:
tfms=[[noop]*3]*2
dsrc = DataSource([0]*1000,tfms=tfms)

In [None]:
%%timeit
dsrc[0]

326 µs ± 1.77 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [None]:
tfms=[[noop]*3]*2
dsrc = DataSource(src_df.path,tfms=tfms)

In [None]:
%%timeit
dsrc[0]

368 µs ± 3.16 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [None]:
tl = TfmdList(l,tfms=[noop,noop])

In [None]:
%%timeit
tl[0]

109 µs ± 1.07 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
def rec_test(i):
    return 2*rec_test(i-1) if i > 0 else 1

In [None]:
%%timeit
rec_test(300)

55.2 µs ± 591 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


## Profiling Datasource

In [None]:
dsrc = DataSource(src)

In [None]:
%%timeit
dsrc[0]

58.2 µs ± 398 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
dsrc = DataSource(src,tfms=tfms,splits=splits)

In [None]:
%%timeit
dsrc[0]

131 µs ± 726 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
# db = dsrc.databunch(bs=128,after_batch=[SeqNoiseInjection(std=[1.1,0.01]),Normalize(axes=[0,1])])
db = dsrc.databunch(bs=128,after_batch=[Cuda(),Normalize(axes=[0,1])])
db.one_batch()[0].shape

torch.Size([128, 1001, 2])

In [None]:
dl = DataLoader(dsrc,bs=2)
dl.one_batch()
db = DataBunch(dl,dl)

In [None]:
dsrc.databunch()

AttributeError: can't set attribute

In [None]:
%%timeit
db.one_batch()

62.7 ms ± 130 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
lrn = Learner(db,SimpleGRU(2,1),loss_func=nn.MSELoss())

In [None]:
# %%prun -s cumulative -l 20 -D batch.prof
lrn.fit(3)

epoch,train_loss,valid_loss,time
0,0.019329,0.010518,00:11
1,0.008568,0.007144,00:11
2,0.006025,0.00536,00:11


In [None]:
%lprun?

In [None]:
%%prun -s cumulative -l 20 -D batch.prof
db.one_batch()

 
*** Profile stats marshalled to file 'batch.prof'. 


In [None]:
%%time
%lprun -f dsrc.tfms[1][0].init_enc -f dsrc.tls.__getitem__ db.one_batch()

CPU times: user 677 ms, sys: 5.59 ms, total: 682 ms
Wall time: 680 ms


In [None]:
%lprun -f DataLoader.one_batch db.one_batch()

In [None]:
%%timeit
dsrc[0]

UsageError: Cell magic `%%lprun` not found (But line magic `%lprun` exists, did you mean that instead?).


In [None]:
#hide
from nbdev.export import *
notebook2script()

# Test Transform Performance

## Comparison function vs transform

In [None]:
def testfun(x): return 2*x

In [None]:
%%timeit
testfun(1)

95.7 ns ± 3.42 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [None]:
testfun2 = Transform(testfun)

In [None]:
%%timeit
testfun2(1)

52.7 µs ± 261 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


Obviously the transform version of the same function takes way more time, by about the factor of 1000x.
It seems that a Transform has a fixed overhead of about 50 µs.

In [None]:
t = Transform()

In [None]:
%%timeit
t(1)

44.7 µs ± 386 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
%lprun -f Transform._do_call t(1)

In [None]:
%lprun -f  anno_ret [t(1) for _ in range(100)]

In [None]:
%%prun -s cumulative -l 20 -D batch.prof
[t(1) for _ in range(100)]

 
*** Profile stats marshalled to file 'batch.prof'. 


In [None]:
Transform??

In [None]:
TypeDispatch.returns_none??

In [None]:
%%timeit
t._call('encodes', 1)

44 µs ± 159 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
%%timeit
t.encodes(1)

31 µs ± 210 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
%%prun -s cumulative -l 20 -D batch.prof
testfun2.encodes(1)

 
*** Profile stats marshalled to file 'batch.prof'. 


In [None]:
t.encodes.returns_none

<bound method TypeDispatch.returns_none of >

# Pandas vs Dictionary Performance

In [None]:
tfms=[  [HDF2Sequence(['current','voltage'])],
        [HDF2Sequence(['voltage'])]]
dsrc = DataSource(src_df.path,tfms=tfms,splits=ParentSplitter()(src_df.path))

In [None]:
%%snakeviz
dsrc[0]

 
*** Profile stats marshalled to file '/tmp/tmp0os9k4jc'. 
Embedding SnakeViz in this document...


In [None]:
l_dic = [{'path':'/mnt/Data/Systemidentification/','l_slc':10,'r_slc':100}]*1000

In [None]:
%%timeit
[x for x in src_df.iloc[:1000]]

160 µs ± 1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
%%timeit
[x for x in l_dic]

23.3 µs ± 72 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
%%timeit
src_df.iloc[0]

144 µs ± 296 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
%%timeit
src_df.path.values[0]

6.55 µs ± 81.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [None]:
%%timeit
src_df.values[0]

2.33 ms ± 16.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
%%timeit
l_dic[0]

36.7 ns ± 0.0212 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [None]:
HDF2Sequence??

## Atribute Checking

In [None]:
item =src_df.iloc[0]

In [None]:
%%timeit
hasattr(item,'l_slc')

12.8 µs ± 173 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [None]:
%%timeit

type(item) is dict

149 ns ± 0.589 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [None]:
item = l_dic[0]

In [None]:
%%timeit
'l_slc' in item

61 ns ± 10 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


Pandas dataframes are in Production very slow. Converting them to dictionarys is very fast and efficient workaround

In [None]:
%%timeit
src_df.to_dict(orient='records')

491 ms ± 3.86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
len(src_df.to_dict(orient='records'))

26461

In [None]:
l = df_source_items(hdf_files,[DfHDFCreateWindows(win_sz=1000+1,stp_sz=1,clm='current')])

In [None]:
%%time
l.to_dict(orient='records')

CPU times: user 15.7 s, sys: 123 ms, total: 15.8 s
Wall time: 15.8 s


[{'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 0,
  'r_slc': 1001},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 1,
  'r_slc': 1002},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 2,
  'r_slc': 1003},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 3,
  'r_slc': 1004},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 4,
  'r_slc': 1005},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 5,
  'r_slc': 1006},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 6,
  'r_slc': 1007},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 7,
  'r_slc': 1008},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 8,
  'r_slc': 1009},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 9,
  'r_slc': 1010},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 10,
  'r_

In [None]:
d = l.to_dict(orient='records')

In [None]:
%%timeit
d[0].get('paths',None)

113 ns ± 3.41 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [None]:
%%timeit
x = d[0]
x['paths'] if 'paths' in x else None

67.8 ns ± 0.664 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [None]:
len(l)

793798

In [None]:
%%time
[x for x in l]

CPU times: user 18.1 ms, sys: 3.9 ms, total: 22 ms
Wall time: 21.3 ms


[{'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 0,
  'r_slc': 1001},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 1,
  'r_slc': 1002},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 2,
  'r_slc': 1003},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 3,
  'r_slc': 1004},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 4,
  'r_slc': 1005},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 5,
  'r_slc': 1006},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 6,
  'r_slc': 1007},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 7,
  'r_slc': 1008},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 8,
  'r_slc': 1009},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 9,
  'r_slc': 1010},
 {'path': Path('test_data/train/Sim_RealisticCycle2.hdf5'),
  'l_slc': 10,
  'r_