# I/O Performance for fixed array sizes

```python
testnames = ["test_large_fraction_changes_sparse", 
             "test_mostly_appends_sparse", 
             "test_small_fraction_changes_sparse", 
             "test_mostly_appends_dense"]
```

In [15]:
path = "/home/melissa/projects/versioned-hdf5"

In [16]:
%matplotlib inline
import os
import sys
sys.path.append('..')
import pickle
import numpy as np
import matplotlib.pyplot as plt
import h5py
import datetime
from versioned_hdf5 import VersionedHDF5File
from generate_data import TestVersionedDatasetPerformance

To ensure reproducibility, set

In [17]:
np.random.seed(1)

# Creating files

In [48]:
testname = "test_large_fraction_changes_sparse"
num_transactions = 1000 #[50, 100, 500, 1000, 2000, 5000, 10000, 20000]

Create new file with `num_transactions` versions:

In [50]:
%%time 
TestVersionedDatasetPerformance().test_large_fraction_changes_sparse(num_transactions)

CPU times: user 11.1 s, sys: 409 ms, total: 11.5 s
Wall time: 11.6 s


In [51]:
filename = f"{testname}_{num_transactions}.h5"
h5pyfile = h5py.File(filename, 'r+')
vfile = VersionedHDF5File(h5pyfile)
num_rows_initial = 5000
len(vfile._versions)

1002

## Adding new version to existing set

In [52]:
%%time
tt = datetime.datetime.utcnow()
with vfile.stage_version(str(tt)) as group:
    key0 = group.create_dataset(testname + '/key0', data=np.random.rand(num_rows_initial), dtype=(np.dtype('int64')))
    key1 = group.create_dataset(testname + '/key1', data=np.random.rand(num_rows_initial), dtype=(np.dtype('int64')))
    val = group.create_dataset(testname + '/val', data=np.random.rand(num_rows_initial), dtype=(np.dtype('float64')))
    tt = datetime.datetime.utcnow()

CPU times: user 34.5 ms, sys: 161 µs, total: 34.7 ms
Wall time: 33.2 ms


In [53]:
N = len(vfile._versions)
print(N)
h5pyfile.close()

1003


## Reading in sequential mode

In [54]:
h5pyfile = h5py.File(filename, 'r')
vfile = VersionedHDF5File(h5pyfile)

In [55]:
%%time
for vname in vfile._versions:
    if vname != '__first_version__':
        version = vfile[vname]
        group_key = list(version.keys())[0]
        val = version[group_key]['val']

CPU times: user 856 ms, sys: 57 ms, total: 913 ms
Wall time: 906 ms


## Reading specific version

In [56]:
index = np.random.randint(0, N)
vname = list(vfile._versions.keys())[index]

In [57]:
vname

'2020-03-09 18:57:31.621279'

In [58]:
%%time
if vname != '__first_version__':
    version = vfile[vname]
    group_key = list(version.keys())[0]
    val = version[group_key]['val']

CPU times: user 1.09 ms, sys: 77 µs, total: 1.17 ms
Wall time: 1.15 ms


# Finishing up

In [59]:
h5pyfile.close()