In [1]:
import numpy as np
import pandas as pd

r = 0.05  # constant short rate
sigma = 0.5  # volatility factor


def generate_sample_data(rows, cols, freq='1min'):
    '''
    Function to generate sample financial data.

    Parameters
    ==========
    rows: int
        number of rows to generate
    cols: int
        number of columns to generate
    freq: str
        frequency string for DatetimeIndex

    Returns
    =======
    df: DataFrame
        DataFrame object with the sample data
    '''
    rows = int(rows)
    cols = int(cols)
    # generate a DatetimeIndex object given the frequency
    index = pd.date_range('2017-1-1', periods=rows, freq=freq)
    # determine time delta in year fractions
    dt = (index[1] - index[0]) / pd.Timedelta(value='365D')
    # generate column names
    columns = ['No%d' % i for i in range(cols)]
    # generate sample paths for geometric Brownian motion
    raw = np.exp(np.cumsum((r - 0.5 * sigma ** 2) * dt +
                 sigma * np.sqrt(dt) *
                 np.random.standard_normal((rows, cols)), axis=0))
    # normalize the data to start at 100
    raw = raw / raw[0] * 100
    # generate the DataFrame object
    df = pd.DataFrame(raw, index=index, columns=columns)
    return df

In [2]:
rows = 5  # number of rows
columns = 3  # number of columns
freq = 'D'  # daily frequency
print(generate_sample_data(rows, columns, freq))


                   No0         No1         No2
2017-01-01  100.000000  100.000000  100.000000
2017-01-02  100.130459   99.419125   99.519175
2017-01-03   99.314039   99.650937   99.986160
2017-01-04   93.594397  105.278061  103.078546
2017-01-05   94.273752  108.330258  103.455386


In [3]:
data = generate_sample_data(rows=2.5e6, cols=5, freq='1s').round(2)  

In [16]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2500000 entries, 2017-01-01 00:00:00 to 2017-01-29 22:26:39
Freq: S
Data columns (total 5 columns):
No0    float64
No1    float64
No2    float64
No3    float64
No4    float64
dtypes: float64(5)
memory usage: 114.4 MB


In [6]:
import tstables
import tables as tb

In [7]:
class desc(tb.IsDescription):
    ''' Description of TsTables table structure.
    '''
    timestamp = tb.Int64Col(pos=0)
    No0 = tb.Float64Col(pos=1)
    No1 = tb.Float64Col(pos=2)
    No2 = tb.Float64Col(pos=3)
    No3 = tb.Float64Col(pos=4)
    No4 = tb.Float64Col(pos=5)

In [10]:
h5 = tb.open_file('data.h5', 'w')

In [11]:
ts = h5.create_ts('/', 'data', desc)
#/Users/ioanwilliams/data.h5

In [15]:
h5

File(filename=data.h5, title='', mode='w', root_uep='/', filters=Filters(complevel=0, shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None))
/ (RootGroup) ''
/data (Group/Timeseries) ''
/data/y2017 (Group) ''
/data/y2017/m03 (Group) ''
/data/y2017/m03/d30 (Group) ''
/data/y2017/m03/d30/ts_data (Table(0,)) ''
  description := {
  "timestamp": Int64Col(shape=(), dflt=0, pos=0),
  "No0": Float64Col(shape=(), dflt=0.0, pos=1),
  "No1": Float64Col(shape=(), dflt=0.0, pos=2),
  "No2": Float64Col(shape=(), dflt=0.0, pos=3),
  "No3": Float64Col(shape=(), dflt=0.0, pos=4),
  "No4": Float64Col(shape=(), dflt=0.0, pos=5)}
  byteorder := 'little'
  chunkshape := (1365,)

In [13]:
data

Unnamed: 0,No0,No1,No2,No3,No4
2017-01-01 00:00:00,100.00,100.00,100.00,100.00,100.00
2017-01-01 00:00:01,99.99,100.00,100.02,99.98,100.00
2017-01-01 00:00:02,99.98,100.00,100.02,99.98,99.99
2017-01-01 00:00:03,99.97,100.00,100.00,99.99,99.99
2017-01-01 00:00:04,99.97,100.01,100.01,99.99,99.98
2017-01-01 00:00:05,99.97,100.02,100.00,99.99,99.96
2017-01-01 00:00:06,99.96,100.01,100.00,99.99,99.98
2017-01-01 00:00:07,99.97,100.01,100.01,99.99,99.97
2017-01-01 00:00:08,99.97,99.99,100.00,99.99,99.96
2017-01-01 00:00:09,99.98,99.99,99.99,99.98,99.95
