In [22]:
import pandas as pd
import numpy as np
from numba import njit

In [23]:
def random_dates(start, end, n=1000000):

    start_u = start.value / 1e3
    end_u = end.value / 1e3
    
    return sorted(pd.to_datetime(np.random.randint(start_u, end_u, n, dtype=np.int64), unit='us'))


start = pd.to_datetime('2015-01-01')
end = pd.to_datetime('2015-03-01')
times = random_dates(start, end)

In [61]:
prices = np.random.lognormal(4,0.03, len(times))
sizes = np.random.randint(100, 5000, len(times))
df = pd.DataFrame({"times": times, "prices": prices, "sizes": sizes}).set_index("times")

In [81]:
time_bar_df = df.groupby(pd.Grouper(freq="1min")).agg({'prices': 'ohlc', 'sizes': 'sum'})
time_bar_df.columns = time_bar_df.columns.get_level_values(1)
time_bar_df.head()

Unnamed: 0_level_0,open,high,low,close,sizes
times,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-01 00:00:00,54.673465,56.450564,51.810608,55.981287,32513
2015-01-01 00:01:00,51.730745,54.515736,51.59027,53.16195,12303
2015-01-01 00:02:00,54.565729,57.512744,52.475725,54.605392,45481
2015-01-01 00:03:00,52.692532,55.927476,51.05204,55.579325,21744
2015-01-01 00:04:00,55.885854,56.488472,52.097841,55.647111,31551


In [66]:
@njit
def bar(xs, y): 
    return (xs / y).astype(np.int64) * y

In [70]:
tick_bar_df = df.groupby(bar(np.arange(len(df)), 10)).agg({'prices': 'ohlc', 'sizes': 'sum'})
tick_bar_df.columns = tick_bar_df.columns.get_level_values(1)
tick_bar_df.head()

Unnamed: 0,open,high,low,close,sizes
0,54.673465,56.450564,51.810608,54.463559,28961
10,55.981287,56.727351,51.59027,55.033733,28695
20,54.067034,57.512744,52.475725,52.475725,25382
30,52.722234,56.364488,51.05204,53.384098,20454
40,54.715569,56.488472,52.423026,52.423026,29662


In [82]:
volume_bar_df = df.groupby(bar(np.cumsum((df['sizes']).to_numpy(dtype=np.int64)), 100000)).agg(
    {'prices': 'ohlc', 'sizes': 'sum'})
volume_bar_df.columns = volume_bar_df.columns.get_level_values(1)
volume_bar_df.head()

Unnamed: 0,open,high,low,close,sizes
0,54.673465,57.512744,51.05204,53.796713,99393
100000,53.384098,57.098199,49.126217,56.580686,99398
200000,52.597644,58.258751,50.494419,52.98679,99067
300000,55.082085,57.453189,51.041228,55.846594,99743
400000,55.448805,57.302612,50.973233,52.461393,101253


In [83]:
dollar_bar_df = df.groupby(bar(np.cumsum((df['sizes'] * df['prices']).to_numpy(dtype=np.int64)), 1000000)).agg(
    {'prices': 'ohlc', 'sizes': 'sum'})
dollar_bar_df.columns = dollar_bar_df.columns.get_level_values(1)
dollar_bar_df.head()

Unnamed: 0,open,high,low,close,sizes
0,54.673465,55.201476,51.810608,51.810608,16130
1000000,54.713263,56.450564,51.730745,54.515736,18347
2000000,53.992908,54.565729,51.59027,54.005389,18642
3000000,56.727351,57.061963,53.760381,57.061963,19026
4000000,57.512744,57.512744,52.475725,54.605392,18152
