In [1]:
import pyarrow as pa
from pyarrow import dataset as pa_ds
from pyarrow import compute as pa_compute
import pandas as pd
from itables import show
import os


from zipline_polygon_bundle.config import PolygonConfig
from zipline_polygon_bundle import generate_csv_trades_tables, ordinary_trades_mask, trades_schema, trades_to_custom_aggs

In [2]:
%load_ext zipline

In [3]:
from zipline.utils.run_algo import load_extensions
load_extensions(default=True,
                extensions=[],
                strict=True,
                environ=None)

In [4]:
config = PolygonConfig(
    environ=os.environ,
    calendar_name="NYSE_ALL_HOURS",
    start_date=pd.Timestamp("2018-01-01"),
    end_date=pd.Timestamp("2023-12-31"),
    agg_time="1min",
)

In [5]:
trades_tables = generate_csv_trades_tables(config, overwrite=True)

In [6]:
date, trades_table = next(trades_tables)
date, trades_table

(datetime.date(2018, 1, 2),
 pyarrow.Table
 ticker: string not null
 conditions: string not null
 correction: string not null
 exchange: int8 not null
 id: string not null
 participant_timestamp: timestamp[ns, tz=UTC] not null
 price: double not null
 sequence_number: int64 not null
 sip_timestamp: timestamp[ns, tz=UTC] not null
 size: int64 not null
 tape: int8 not null
 trf_id: int64 not null
 trf_timestamp: timestamp[ns, tz=UTC] not null
 ----
 ticker: [["A","A","A","A","A",...,"AA","AA","AA","AA","AA"],["AA","AA","AA","AA","AA",...,"AA","AA","AA","AA","AA"],...,["ZVZZT","ZVZZT","ZVZZT","ZVZZT","ZVZZT",...,"ZXZZT","ZXZZT","ZXZZT","ZXZZT","ZXZZT"],["ZXZZT","ZXZZT","ZXZZT","ZXZZT","ZXZZT",...,"ZYNE","ZYNE","ZYNE","ZYNE","ZYNE"]]
 conditions: [["17,9,41","16","14,37,41","17","37",...,"","","37","","37"],["14,41","","14,41","37","",...,"14,41","","","14,41",""],...,["14,41","14,41","14,41","14,41","14,41",...,"","","","",""],["","","","","",...,"13,37","13,37","13,37","13,37","13,37"]]


In [7]:
trades_table.schema

ticker: string not null
conditions: string not null
correction: string not null
exchange: int8 not null
id: string not null
participant_timestamp: timestamp[ns, tz=UTC] not null
price: double not null
sequence_number: int64 not null
sip_timestamp: timestamp[ns, tz=UTC] not null
size: int64 not null
tape: int8 not null
trf_id: int64 not null
trf_timestamp: timestamp[ns, tz=UTC] not null

In [8]:
ordinary_aggs = trades_to_custom_aggs(config,
                                  date,
                                  trades_table.filter(ordinary_trades_mask(trades_table)))
ordinary_aggs

date=datetime.date(2018, 1, 2) pa.default_memory_pool()=<pyarrow.MemoryPool backend_name=mimalloc bytes_allocated=4733827456 max_memory=4741816192>


pyarrow.Table
ticker: string not null
window_start: timestamp[ns, tz=UTC]
open: double
high: double
low: double
close: double
traded_value: double
volume: int64
transactions: int64
vwap: double
cumulative_traded_value: double
date: date32[day]
year: uint16
month: uint8
part: string
----
ticker: [["AAPL","ABT","AG","AMD","AMZN",...,"ZDGE","ZMLP","ZOM","ZROZ","ZSL"]]
window_start: [[2018-01-02 09:00:00.000000000Z,2018-01-02 09:00:00.000000000Z,2018-01-02 09:00:00.000000000Z,2018-01-02 09:00:00.000000000Z,2018-01-02 09:00:00.000000000Z,...,2018-01-03 01:00:00.000000000Z,2018-01-03 01:00:00.000000000Z,2018-01-03 01:00:00.000000000Z,2018-01-03 01:00:00.000000000Z,2018-01-03 01:00:00.000000000Z]]
open: [[169.3,58,6.82,10.32,1172,...,2.7,16.4106,1.9,119.02,30.51]]
high: [[169.3,58,6.82,10.32,1172,...,2.7,16.4106,1.9,119.02,30.51]]
low: [[169.1,58,6.82,10.32,1172,...,2.7,16.4106,1.9,119.02,30.51]]
close: [[169.1,58,6.82,10.32,1172,...,2.7,16.4106,1.9,119.02,30.51]]
traded_value: [[40418.82,290

In [9]:
ordinary_aggs.schema

ticker: string not null
window_start: timestamp[ns, tz=UTC]
open: double
high: double
low: double
close: double
traded_value: double
volume: int64
transactions: int64
vwap: double
cumulative_traded_value: double
date: date32[day]
year: uint16
month: uint8
part: string

In [10]:
ordinary_aggs.sort_by([("ticker", "ascending"), ("window_start", "ascending")]).to_pandas()

Unnamed: 0,ticker,window_start,open,high,low,close,traded_value,volume,transactions,vwap,cumulative_traded_value,date,year,month,part
0,A,2018-01-02 14:30:00+00:00,67.4200,67.4800,67.2500,67.2500,8.209417e+05,12172,13,67.445099,8.209417e+05,2018-01-02,2018,1,AA
1,A,2018-01-02 14:31:00+00:00,67.4500,67.5800,67.4500,67.5600,6.481947e+04,960,16,67.520281,8.857612e+05,2018-01-02,2018,1,AA
2,A,2018-01-02 14:32:00+00:00,67.5800,67.5800,67.4900,67.5200,9.391083e+04,1391,19,67.513178,9.796720e+05,2018-01-02,2018,1,AA
3,A,2018-01-02 14:33:00+00:00,67.5200,67.6800,67.4900,67.6500,1.441668e+06,21351,86,67.522274,2.421340e+06,2018-01-02,2018,1,AA
4,A,2018-01-02 14:34:00+00:00,67.5000,67.5575,67.5000,67.5000,4.247200e+04,629,7,67.523052,2.463812e+06,2018-01-02,2018,1,AA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1483243,ZYNE,2018-01-02 20:56:00+00:00,12.9900,12.9999,12.9900,12.9999,3.976952e+03,306,4,12.996575,2.612626e+06,2018-01-02,2018,1,ZY
1483244,ZYNE,2018-01-02 20:57:00+00:00,13.0000,13.0000,12.9994,13.0000,3.990996e+03,307,4,12.999986,2.616617e+06,2018-01-02,2018,1,ZY
1483245,ZYNE,2018-01-02 20:58:00+00:00,13.0000,13.0000,12.9800,13.0000,2.320030e+04,1785,14,12.997367,2.639817e+06,2018-01-02,2018,1,ZY
1483246,ZYNE,2018-01-02 20:59:00+00:00,12.9994,13.0000,12.9700,13.0000,2.592991e+04,1995,26,12.997451,2.665747e+06,2018-01-02,2018,1,ZY


In [30]:
ordinary_aggs = ordinary_aggs.sort_by([("ticker", "ascending"), ("window_start", "ascending")])

In [31]:
traded_values_by_ticker = ordinary_aggs.group_by("ticker").aggregate(
    [("traded_value", "list")])
traded_values_by_ticker

pyarrow.Table
ticker: string not null
traded_value_list: list<item: double>
  child 0, item: double
----
ticker: [["A","AA","AAAP","AABA","AAC",...,"ZX","ZXIET","ZXZZT","ZYME","ZYNE"]]
traded_value_list: [[[820941.74,64819.46999999999,93910.83,1441668.08,42472,...,1744966.4173000003,23951695.000000004,243359.99999999997,25349.999999999996,3920.7999999999997],[109.8,54.9,1077.3999999999999,4309.599999999999,1830.9,...,5516,110.34,1104,1379,1105],...,[7699,10092.8586,4144.280000000001,531.0799999999999,990.5536,7.71,998.4,30870,951.984,2153.248,1597.79],[56730.347499999996,8197.215,63.0505,10446.764000000001,7915.725799999999,...,3976.9518,3990.9957999999997,23200.3,25929.9142,40507.415]]]

In [32]:
traded_values_by_ticker.to_pandas()

Unnamed: 0,ticker,traded_value_list
0,A,"[820941.74, 64819.46999999999, 93910.83, 14416..."
1,AA,"[109.8, 54.9, 1077.3999999999999, 4309.5999999..."
2,AAAP,"[408322.24999999994, 8163.0, 1958.639999999999..."
3,AABA,"[70500.0, 28270.0, 141000.0, 12372.5, 3535.0, ..."
4,AAC,"[7216.0, 2706.5, 6552.5599999999995, 4411.1, 9..."
...,...,...
8289,ZX,"[3329.61, 4.8804, 1634.5327, 37.722, 25.8, 61...."
8290,ZXIET,"[200000.0, 600000.0]"
8291,ZXZZT,"[1000.0, 1117018.5, 76998.5, 97794.0, 83237.0,..."
8292,ZYME,"[7699.0, 10092.8586, 4144.280000000001, 531.07..."


In [33]:
cp_aggs = ordinary_aggs.append_column("cumulative_traded_value",
                            pa.concat_arrays([pa.array(values_list) for values_list in traded_values_by_ticker["traded_value_list"].combine_chunks()]))
cp_aggs

pyarrow.Table
ticker: string not null
window_start: timestamp[ns, tz=UTC]
open: double
high: double
low: double
close: double
traded_value: double
volume: int64
transactions: int64
vwap: double
date: date32[day]
year: uint16
month: uint8
part: string
cumulative_traded_value: double
----
ticker: [["A","A","A","A","A",...,"ZYNE","ZYNE","ZYNE","ZYNE","ZYNE"]]
window_start: [[2018-01-02 14:30:00.000000000Z,2018-01-02 14:31:00.000000000Z,2018-01-02 14:32:00.000000000Z,2018-01-02 14:33:00.000000000Z,2018-01-02 14:34:00.000000000Z,...,2018-01-02 20:56:00.000000000Z,2018-01-02 20:57:00.000000000Z,2018-01-02 20:58:00.000000000Z,2018-01-02 20:59:00.000000000Z,2018-01-02 21:00:00.000000000Z]]
open: [[67.42,67.45,67.58,67.52,67.5,...,12.99,13,13,12.9994,13]]
high: [[67.48,67.58,67.58,67.68,67.5575,...,12.9999,13,13,13,13]]
low: [[67.25,67.45,67.49,67.49,67.5,...,12.99,12.9994,12.98,12.97,12.995]]
close: [[67.25,67.56,67.52,67.65,67.5,...,12.9999,13,13,13,12.995]]
traded_value: [[820941.74,64819.46

In [34]:
cp_aggs.to_pandas()

Unnamed: 0,ticker,window_start,open,high,low,close,traded_value,volume,transactions,vwap,date,year,month,part,cumulative_traded_value
0,A,2018-01-02 14:30:00+00:00,67.4200,67.4800,67.2500,67.2500,8.209417e+05,12172,13,67.445099,2018-01-02,2018,1,AA,8.209417e+05
1,A,2018-01-02 14:31:00+00:00,67.4500,67.5800,67.4500,67.5600,6.481947e+04,960,16,67.520281,2018-01-02,2018,1,AA,6.481947e+04
2,A,2018-01-02 14:32:00+00:00,67.5800,67.5800,67.4900,67.5200,9.391083e+04,1391,19,67.513178,2018-01-02,2018,1,AA,9.391083e+04
3,A,2018-01-02 14:33:00+00:00,67.5200,67.6800,67.4900,67.6500,1.441668e+06,21351,86,67.522274,2018-01-02,2018,1,AA,1.441668e+06
4,A,2018-01-02 14:34:00+00:00,67.5000,67.5575,67.5000,67.5000,4.247200e+04,629,7,67.523052,2018-01-02,2018,1,AA,4.247200e+04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1483243,ZYNE,2018-01-02 20:56:00+00:00,12.9900,12.9999,12.9900,12.9999,3.976952e+03,306,4,12.996575,2018-01-02,2018,1,ZY,3.976952e+03
1483244,ZYNE,2018-01-02 20:57:00+00:00,13.0000,13.0000,12.9994,13.0000,3.990996e+03,307,4,12.999986,2018-01-02,2018,1,ZY,3.990996e+03
1483245,ZYNE,2018-01-02 20:58:00+00:00,13.0000,13.0000,12.9800,13.0000,2.320030e+04,1785,14,12.997367,2018-01-02,2018,1,ZY,2.320030e+04
1483246,ZYNE,2018-01-02 20:59:00+00:00,12.9994,13.0000,12.9700,13.0000,2.592991e+04,1995,26,12.997451,2018-01-02,2018,1,ZY,2.592991e+04


In [35]:
cum_aggs = ordinary_aggs.append_column("cumulative_traded_value",
                            pa.concat_arrays([pa_compute.cumulative_sum(pa.array(values_list)) for values_list in traded_values_by_ticker["traded_value_list"].combine_chunks()]))
cum_aggs

pyarrow.Table
ticker: string not null
window_start: timestamp[ns, tz=UTC]
open: double
high: double
low: double
close: double
traded_value: double
volume: int64
transactions: int64
vwap: double
date: date32[day]
year: uint16
month: uint8
part: string
cumulative_traded_value: double
----
ticker: [["A","A","A","A","A",...,"ZYNE","ZYNE","ZYNE","ZYNE","ZYNE"]]
window_start: [[2018-01-02 14:30:00.000000000Z,2018-01-02 14:31:00.000000000Z,2018-01-02 14:32:00.000000000Z,2018-01-02 14:33:00.000000000Z,2018-01-02 14:34:00.000000000Z,...,2018-01-02 20:56:00.000000000Z,2018-01-02 20:57:00.000000000Z,2018-01-02 20:58:00.000000000Z,2018-01-02 20:59:00.000000000Z,2018-01-02 21:00:00.000000000Z]]
open: [[67.42,67.45,67.58,67.52,67.5,...,12.99,13,13,12.9994,13]]
high: [[67.48,67.58,67.58,67.68,67.5575,...,12.9999,13,13,13,13]]
low: [[67.25,67.45,67.49,67.49,67.5,...,12.99,12.9994,12.98,12.97,12.995]]
close: [[67.25,67.56,67.52,67.65,67.5,...,12.9999,13,13,13,12.995]]
traded_value: [[820941.74,64819.46

In [36]:
cum_aggs.to_pandas()

Unnamed: 0,ticker,window_start,open,high,low,close,traded_value,volume,transactions,vwap,date,year,month,part,cumulative_traded_value
0,A,2018-01-02 14:30:00+00:00,67.4200,67.4800,67.2500,67.2500,8.209417e+05,12172,13,67.445099,2018-01-02,2018,1,AA,8.209417e+05
1,A,2018-01-02 14:31:00+00:00,67.4500,67.5800,67.4500,67.5600,6.481947e+04,960,16,67.520281,2018-01-02,2018,1,AA,8.857612e+05
2,A,2018-01-02 14:32:00+00:00,67.5800,67.5800,67.4900,67.5200,9.391083e+04,1391,19,67.513178,2018-01-02,2018,1,AA,9.796720e+05
3,A,2018-01-02 14:33:00+00:00,67.5200,67.6800,67.4900,67.6500,1.441668e+06,21351,86,67.522274,2018-01-02,2018,1,AA,2.421340e+06
4,A,2018-01-02 14:34:00+00:00,67.5000,67.5575,67.5000,67.5000,4.247200e+04,629,7,67.523052,2018-01-02,2018,1,AA,2.463812e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1483243,ZYNE,2018-01-02 20:56:00+00:00,12.9900,12.9999,12.9900,12.9999,3.976952e+03,306,4,12.996575,2018-01-02,2018,1,ZY,2.612626e+06
1483244,ZYNE,2018-01-02 20:57:00+00:00,13.0000,13.0000,12.9994,13.0000,3.990996e+03,307,4,12.999986,2018-01-02,2018,1,ZY,2.616617e+06
1483245,ZYNE,2018-01-02 20:58:00+00:00,13.0000,13.0000,12.9800,13.0000,2.320030e+04,1785,14,12.997367,2018-01-02,2018,1,ZY,2.639817e+06
1483246,ZYNE,2018-01-02 20:59:00+00:00,12.9994,13.0000,12.9700,13.0000,2.592991e+04,1995,26,12.997451,2018-01-02,2018,1,ZY,2.665747e+06
