In [275]:
from os import environ
import pandas as pd
from pyarrow.dataset import dataset, field


def get_s3_dataset() -> pyarrow._dataset.FileSystemDataset:
    from pyarrow.fs import S3FileSystem
    s3  = S3FileSystem(
        access_key=environ['B2_ACCESS_KEY_ID'],
        secret_key=environ['B2_SECRET_ACCESS_KEY'],
        endpoint_override=environ['B2_ENDPOINT_URL']
    )
    ds = dataset(
        source='polygon-equities/data/trades/',
        format='feather',
        filesystem=s3,
        partitioning='hive',
        exclude_invalid_files=True
    )
    return ds


def get_local_dataset() -> pyarrow._dataset.FileSystemDataset:
    ds = dataset(
        source='/Users/bobcolner/QuantClarity/pandas-polygon/data/trades/',
        format='feather',
        partitioning='hive',
        exclude_invalid_files=True
    )
    return ds


def filter_dataset(ds: pyarrow._dataset.FileSystemDataset, 
                   symbol: str, start_date: str, end_date: str) -> pd.DataFrame:
    filter_exp = (field('date') >= start_date) & \
        (field('date') <= end_date) & \
        (field('symbol') == symbol)
    return ds.to_table(filter=filter_exp).to_pandas()

In [276]:
ds = get_local_dataset()

In [277]:
df = filter_dataset(ds, symbol='DUST', start_date='2020-01-01', end_date='2020-03-01')

In [278]:
df

Unnamed: 0,date_time,price,volume,symbol,date
0,2020-01-02 09:03:25.843223430,5.62,123,DUST,2020-01-02
1,2020-01-02 09:19:38.947817302,5.61,100,DUST,2020-01-02
2,2020-01-02 10:11:19.695879061,5.61,376,DUST,2020-01-02
3,2020-01-02 10:15:39.255461602,5.61,624,DUST,2020-01-02
4,2020-01-02 10:17:59.228102560,5.65,1,DUST,2020-01-02
...,...,...,...,...,...
688172,2020-02-29 00:50:05.462964368,6.93,100,DUST,2020-02-28
688173,2020-02-29 00:50:06.191211900,6.93,878,DUST,2020-02-28
688174,2020-02-29 00:50:23.430102495,6.94,946,DUST,2020-02-28
688175,2020-02-29 00:52:22.424695522,6.94,300,DUST,2020-02-28


In [195]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import pandas_bokeh
# import finplot as fplt

import polygon_rest_api as papi
import polygon_backfill as pb
# import ray_backfill as rb
import polygon_s3 as ps3
import bar_samples as bs
import bar_labels as bl

# import filters as ft
# import market_cluster as mc

import matplotlib as mpl
mpl.rcParams['figure.figsize'] = [20, 7]
from matplotlib import style
style.use('fivethirtyeight')

# import ipdb
# ipdb.set_trace(context=10)

# df.style.background_gradient(cmap='coolwarm', axis=None, vmin=-1, vmax=1)  # 'Blues', 'coolwarm', 'RdBu_r' & 'BrBG' are other good diverging colormaps
# pd.set_option('precision', 2)

# get local market daily data
# mdf = pd.read_parquet('data/mdf.parquet')

# .plot_bokeh(sizing_mode="scale_height")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [205]:
symbol='GLD'
# symbol = 'market'
date = '2020-01-02'
start_date = '2020-01-01' 
end_date = '2020-10-20'
symbols=['SPY', 'QQQ']
tick_type='trades'
# tick_type = None

result_path = '/Users/bobcolner/QuantClarity/pandas-polygon/data'

In [196]:
ps3.get_symbol_dates(symbol='GLD', tick_type=tick_type)

['2020-01-02',
 '2020-01-03',
 '2020-01-06',
 '2020-01-07',
 '2020-01-08',
 '2020-01-09',
 '2020-01-10',
 '2020-01-13',
 '2020-01-14',
 '2020-01-15',
 '2020-01-16',
 '2020-01-17',
 '2020-01-21',
 '2020-01-22',
 '2020-01-23',
 '2020-01-24',
 '2020-01-27',
 '2020-01-28',
 '2020-01-29',
 '2020-01-30',
 '2020-01-31',
 '2020-02-03',
 '2020-02-04',
 '2020-02-05',
 '2020-02-06',
 '2020-02-07',
 '2020-02-10',
 '2020-02-11',
 '2020-02-12',
 '2020-02-13',
 '2020-02-14',
 '2020-02-18',
 '2020-02-19',
 '2020-02-20',
 '2020-02-21',
 '2020-02-24',
 '2020-02-25',
 '2020-02-26',
 '2020-02-27',
 '2020-02-28',
 '2020-03-02',
 '2020-03-03',
 '2020-03-04',
 '2020-03-05',
 '2020-03-06',
 '2020-03-09',
 '2020-03-10',
 '2020-03-11',
 '2020-03-12',
 '2020-03-13',
 '2020-03-16',
 '2020-03-17',
 '2020-03-18',
 '2020-03-19',
 '2020-03-20',
 '2020-03-23',
 '2020-03-24',
 '2020-03-25',
 '2020-03-26',
 '2020-03-27',
 '2020-03-30',
 '2020-03-31',
 '2020-04-01',
 '2020-04-02',
 '2020-04-03',
 '2020-04-06',
 '2020-04-

In [206]:
df1=ps3.load_ticks(result_path, symbol, date='2020-01-02')

trying to get ticks from local file...


In [207]:
df2=ps3.load_ticks(result_path, symbol, date='2020-01-03')

trying to get ticks from local file...


In [None]:
# sample ticks -> bars
renko_size = 0.2
# renko_reversal = 2
thresh = {
    # GT thresholds
    'renko_size': renko_size,
    'renko_reveral_multiple': 2,
    'duration_sec': 60*30,
    'tick_imbalance': 150,
    # LT thresholds 
    'min_duration_sec': 15,
    'min_tick_count': 30,
}
# ticks_df['epoch'] = ticks_df.date_time.astype(int)
tdf = ticks_df.loc[(ticks_df.date_time.dt.hour >= 13) & (ticks_df.date_time.dt.hour <= 22)]
bars, _ = bs.build_bars(ticks_df=tdf, thresh=thresh)
len(bars)

bars_df = pd.DataFrame(bars).set_index('close_at')
# bars_df = pd.DataFrame(bars)

In [None]:
# range / price = HFT profit potential
(df.price.max() - df.price.min()) / df.price.mean()

In [None]:
# label bars
label_horizon_mins = 30
risk_level = 0.2
# reward_base = 1
reward_ratios = np.arange(3, 20, .5)

lbars = bl.label_bars(bars, ticks_df, risk_level, label_horizon_mins, reward_ratios)

lbars_df = pd.DataFrame(lbars)

lbars_unq = bl.get_concurrent_stats(lbars_df)
print(lbars_unq['grand_avg_unq'])

In [None]:
lbars_df[['price_wmean', 'label_rrr']].plot_bokeh(sizing_mode="scale_height")
# 17, 31

In [None]:
lbars_df[['price_wmean', 'label_rrr']].plot(subplots=True, figsize=[17,12])

In [None]:
label_prices, price_end_at = bl.get_label_ticks(tdf, bars[31]['close_at'], 30)