In [1]:
from datetime import datetime, date, timedelta, time
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots


from quantrion import settings
from quantrion.ticker.alpaca import AlpacaTicker, AlpacaTickerListProvider
from quantrion.utils import MarketDatetime as mdt

load_dotenv()
%load_ext autoreload
%autoreload 2

In [18]:
ticker = AlpacaTicker("AAPL")

In [33]:
start = mdt.now() - timedelta(days=3)
df = await ticker.get_bars(start, freq="3min")
b_sma = await ticker.get_sma(start, freq="3min", n=200)
lower, sma, upper = await ticker.get_bollinger_bands(start, freq="3min")

2022-08-25 19:02:51.405486-04:00
2022-08-26 13:02:51.405486-04:00


In [34]:
# 161.202512
# b_sma, bands
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Candlestick(
        x=df.index, open=df["open"], high=df["high"], low=df["low"], close=df["close"]
    ),
)
fig.add_trace(
    go.Scatter(x=sma.index, y=sma),
)
fig.add_trace(
    go.Scatter(x=lower.index, y=lower),
)
fig.add_trace(
    go.Scatter(x=upper.index, y=upper),
)
fig.add_trace(
    go.Scatter(
        x=b_sma.index,
        y=b_sma,
    ),
)
fig.update_xaxes(
    rangeslider_visible=True,
    rangebreaks=[
        dict(bounds=["sat", "mon"]),  # hide weekends, eg. hide sat to before mon
        dict(bounds=[16, 9.5], pattern="hour"),  # hide hours outside of 9.30am-4pm
    ],
)
fig.show()

In [None]:
# start = mdt.now() - timedelta(days=365)
# bars = await ticker.get_bars(start)
# bars = bars[(bars.index.time <= time(16, 0)) & (bars.index.time >= time(9, 30))]
# bars.to_csv("./files/apple_historical.csv")
# bars.head()

In [62]:
_bars_resample_funcs = {
    "open": "first",
    "high": "max",
    "low": "min",
    "close": "last",
    "volume": "sum",
    "price": "sum",
    "n_trades": "sum",
}
bars = pd.read_csv("./files/apple_historical.csv")
bars["start"] = pd.DatetimeIndex(pd.to_datetime(bars["start"], utc=True)).tz_convert(
    "America/New_York"
)
bars = bars.set_index("start")
bars = bars[bars.index.dayofweek <= 4]
bars["price"] *= bars["volume"]
bars = bars.resample("3min").aggregate(_bars_resample_funcs).dropna()
bars["price"] /= bars["volume"]
_bars_fill_values = {
    "volume": 0,
}
bars = bars.fillna(_bars_fill_values)
na_index = bars["close"].isna()
bars["close"] = bars["close"].fillna(method="ffill")
for col in ["open", "high", "low"]:
    bars.loc[na_index, col] = bars["close"]
bars = bars.fillna(method="ffill")
bars.tail()

Unnamed: 0_level_0,open,high,low,close,volume,price,n_trades
start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-08-26 13:39:00-04:00,165.285,165.295,164.81,164.98,755158,165.018717,11026
2022-08-26 13:42:00-04:00,164.94,165.05,164.89,164.99,397023,164.961646,3887
2022-08-26 13:45:00-04:00,164.99,165.0,164.9057,164.97,202507,164.955268,2784
2022-08-26 13:48:00-04:00,164.97,165.04,164.88,164.94,237215,164.963076,3093
2022-08-26 13:51:00-04:00,164.94,164.99,164.89,164.895,155248,164.941133,1454


In [64]:
spread = 0.012
tz = "America/New_York"
split_dt = pd.Timestamp("2022-06-01").tz_localize(tz)
bars_train = bars[:split_dt]
bars_test = bars[split_dt:]


def plot_candles(df):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(
        go.Candlestick(
            x=df.index,
            open=df["open"],
            high=df["high"],
            low=df["low"],
            close=df["close"],
        ),
    )
    fig.add_trace(
        go.Scatter(x=df.index, y=df["SMA"]),
    )
    fig.add_trace(
        go.Scatter(x=df.index, y=df["LB"]),
    )
    fig.add_trace(
        go.Scatter(x=df.index, y=df["UB"]),
    )
    fig.add_trace(
        go.Scatter(
            x=df.index,
            y=df["positions"],
        ),
        secondary_y=True,
    )
    fig.add_trace(
        go.Scatter(
            x=df.index,
            y=df["B_SMA"],
        ),
    )
    fig.update_xaxes(
        rangeslider_visible=True,
        rangebreaks=[
            dict(bounds=["sat", "mon"]),  # hide weekends, eg. hide sat to before mon
            dict(bounds=[16, 9.5], pattern="hour"),  # hide hours outside of 9.30am-4pm
        ],
    )
    fig.update_yaxes(secondary_y=True)
    fig.show()


def plot_strategy(df):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_traces(
        [
            go.Scatter(x=df.index, y=df["strategy"], name="strategy"),
            go.Scatter(x=df.index, y=df["benchmark"], name="benchmark"),
            go.Scatter(x=df.index, y=df["trades"], name="trades"),
        ],
        secondary_ys=[False, False, True],
    )
    fig.update_xaxes(
        rangeslider_visible=True,
        rangebreaks=[
            dict(bounds=["sat", "mon"]),  # hide weekends, eg. hide sat to before mon
            dict(bounds=[16, 9.5], pattern="hour"),  # hide hours outside of 9.30am-4pm
        ],
    )
    fig.show()


def process_bars(bars, sma_lag, big_sma_lag):
    bars = bars.copy()
    bars["SMA"] = bars["close"].rolling(sma_lag).mean()
    bars["B_SMA"] = bars["close"].rolling(big_sma_lag).mean()
    bars["STD"] = bars["close"].rolling(sma_lag).std()
    bars["LB"] = bars["SMA"] - 2 * bars["STD"]
    bars["UB"] = bars["SMA"] + 2 * bars["STD"]
    bars = bars.dropna(axis=0)
    bars["SMAdist"] = bars["B_SMA"] - bars["SMA"]
    bars.loc[
        (bars["close"] > bars["UB"]) & (bars["SMA"] < bars["B_SMA"]), "positions"
    ] = -1
    bars.loc[
        (bars["close"] < bars["LB"]) & (bars["SMA"] > bars["B_SMA"]), "positions"
    ] = 1
    bars.loc[bars["SMAdist"] * bars["SMAdist"].shift(1) < 0, "positions"] = 0

    #     taps_count = 0
    #     position = 0
    #     for index, bar in bars.iterrows():
    #         tap = bar["taps"]
    #         if tap == 0:
    #             continue
    #         if int(tap * taps_count) < 0:
    #             position = 0
    #             taps_count = 0
    #         taps_count += tap
    #         if tap < 0:
    #             position = 1
    #         if tap > 0:
    #             position = -1
    #         bars.loc[index, "positions"] = position
    #     bars["ctaps"] = bars.groupby(bars.index.date)["taps"].cumsum()

    #     bars.loc[bars[up_cross_stat] > bars["UB"], "positions"] = -1
    #     bars.loc[bars[low_cross_stat] < bars["LB"], "positions"] = 1
    #     bars.loc[bars.index.time >= time(15, 55), "positions"] = 0
    #     bars.loc[
    #         ((bars[low_cross_stat] <= bars["SMA"]) & (bars["positions"].ffill() == -1)) |
    #         ((bars[up_cross_stat] >= bars["SMA"]) & (bars["positions"].ffill() == 1)),
    #         "positions"
    #     ] = 0
    bars["positions"] = bars["positions"].ffill()
    bars["trades"] = bars["positions"].diff().abs().fillna(0).cumsum()
    bars["positions"] = bars["positions"].shift(1).fillna(0)
    bars["price_change"] = bars["close"].pct_change().fillna(0)
    bars["profit"] = bars["price_change"] * bars["positions"]
    bars["strategy"] = (1 + bars["profit"]).cumprod() - bars["trades"] * spread / bars[
        "price"
    ]
    bars["benchmark"] = (1 + bars["price_change"]).cumprod() - spread / bars["price"]
    return bars


import optuna


def objective(trial):
    sma_lag = trial.suggest_int("sma_lag", 10, 50)
    big_sma_lag = trial.suggest_int("big_sma_lag", sma_lag, 500)
    #     up_cross_stat = trial.suggest_categorical("up_cross_stat", ["open", "low", "high", "close", "price"])
    #     low_cross_stat = trial.suggest_categorical("low_cross_stat", ["open", "low", "high", "close", "price"])
    df = process_bars(bars_train, sma_lag, big_sma_lag)
    return (df["strategy"][-1] - df["strategy"][0]) / df["strategy"][0]


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=200)

[32m[I 2022-08-29 21:15:56,220][0m A new study created in memory with name: no-name-7190e1fd-6e36-4f1a-85ad-9a4cb1deec71[0m
[32m[I 2022-08-29 21:15:56,235][0m Trial 0 finished with value: 0.2634748614001703 and parameters: {'sma_lag': 15, 'big_sma_lag': 314}. Best is trial 0 with value: 0.2634748614001703.[0m
[32m[I 2022-08-29 21:15:56,247][0m Trial 1 finished with value: 0.08404753178137825 and parameters: {'sma_lag': 42, 'big_sma_lag': 460}. Best is trial 0 with value: 0.2634748614001703.[0m
[32m[I 2022-08-29 21:15:56,256][0m Trial 2 finished with value: 0.05334326089649721 and parameters: {'sma_lag': 30, 'big_sma_lag': 213}. Best is trial 0 with value: 0.2634748614001703.[0m
[32m[I 2022-08-29 21:15:56,266][0m Trial 3 finished with value: 0.07083969438314819 and parameters: {'sma_lag': 11, 'big_sma_lag': 126}. Best is trial 0 with value: 0.2634748614001703.[0m
[32m[I 2022-08-29 21:15:56,275][0m Trial 4 finished with value: 0.045425402852417474 and parameters: {'sma_l

[32m[I 2022-08-29 21:15:56,611][0m Trial 42 finished with value: 0.3913384350183555 and parameters: {'sma_lag': 12, 'big_sma_lag': 273}. Best is trial 32 with value: 0.4156825191160729.[0m
[32m[I 2022-08-29 21:15:56,620][0m Trial 43 finished with value: 0.33426568022844494 and parameters: {'sma_lag': 11, 'big_sma_lag': 279}. Best is trial 32 with value: 0.4156825191160729.[0m
[32m[I 2022-08-29 21:15:56,629][0m Trial 44 finished with value: 0.20800577976203516 and parameters: {'sma_lag': 17, 'big_sma_lag': 252}. Best is trial 32 with value: 0.4156825191160729.[0m
[32m[I 2022-08-29 21:15:56,638][0m Trial 45 finished with value: 0.2690156328444431 and parameters: {'sma_lag': 10, 'big_sma_lag': 313}. Best is trial 32 with value: 0.4156825191160729.[0m
[32m[I 2022-08-29 21:15:56,647][0m Trial 46 finished with value: 0.020521769904546794 and parameters: {'sma_lag': 15, 'big_sma_lag': 199}. Best is trial 32 with value: 0.4156825191160729.[0m
[32m[I 2022-08-29 21:15:56,656][0m

[32m[I 2022-08-29 21:15:56,998][0m Trial 85 finished with value: 0.1959555063105829 and parameters: {'sma_lag': 45, 'big_sma_lag': 360}. Best is trial 32 with value: 0.4156825191160729.[0m
[32m[I 2022-08-29 21:15:57,007][0m Trial 86 finished with value: 0.2741616209520039 and parameters: {'sma_lag': 17, 'big_sma_lag': 315}. Best is trial 32 with value: 0.4156825191160729.[0m
[32m[I 2022-08-29 21:15:57,018][0m Trial 87 finished with value: 0.33433767856741325 and parameters: {'sma_lag': 13, 'big_sma_lag': 244}. Best is trial 32 with value: 0.4156825191160729.[0m
[32m[I 2022-08-29 21:15:57,028][0m Trial 88 finished with value: 0.3996611496656022 and parameters: {'sma_lag': 11, 'big_sma_lag': 336}. Best is trial 32 with value: 0.4156825191160729.[0m
[32m[I 2022-08-29 21:15:57,037][0m Trial 89 finished with value: 0.36998013696384024 and parameters: {'sma_lag': 11, 'big_sma_lag': 329}. Best is trial 32 with value: 0.4156825191160729.[0m
[32m[I 2022-08-29 21:15:57,046][0m T

[32m[I 2022-08-29 21:15:57,403][0m Trial 128 finished with value: 0.3990015184766975 and parameters: {'sma_lag': 12, 'big_sma_lag': 338}. Best is trial 105 with value: 0.4309218201599585.[0m
[32m[I 2022-08-29 21:15:57,413][0m Trial 129 finished with value: -0.04573380196746046 and parameters: {'sma_lag': 12, 'big_sma_lag': 13}. Best is trial 105 with value: 0.4309218201599585.[0m
[32m[I 2022-08-29 21:15:57,423][0m Trial 130 finished with value: 0.4001135131436402 and parameters: {'sma_lag': 11, 'big_sma_lag': 345}. Best is trial 105 with value: 0.4309218201599585.[0m
[32m[I 2022-08-29 21:15:57,432][0m Trial 131 finished with value: 0.39212941663184697 and parameters: {'sma_lag': 11, 'big_sma_lag': 347}. Best is trial 105 with value: 0.4309218201599585.[0m
[32m[I 2022-08-29 21:15:57,441][0m Trial 132 finished with value: 0.29311487049758767 and parameters: {'sma_lag': 10, 'big_sma_lag': 371}. Best is trial 105 with value: 0.4309218201599585.[0m
[32m[I 2022-08-29 21:15:57

[32m[I 2022-08-29 21:15:57,790][0m Trial 170 finished with value: 0.27002412000841325 and parameters: {'sma_lag': 16, 'big_sma_lag': 354}. Best is trial 138 with value: 0.44226913036249527.[0m
[32m[I 2022-08-29 21:15:57,800][0m Trial 171 finished with value: 0.2442447187502299 and parameters: {'sma_lag': 29, 'big_sma_lag': 340}. Best is trial 138 with value: 0.44226913036249527.[0m
[32m[I 2022-08-29 21:15:57,809][0m Trial 172 finished with value: 0.4128500893565612 and parameters: {'sma_lag': 13, 'big_sma_lag': 333}. Best is trial 138 with value: 0.44226913036249527.[0m
[32m[I 2022-08-29 21:15:57,819][0m Trial 173 finished with value: 0.40376051334133645 and parameters: {'sma_lag': 12, 'big_sma_lag': 351}. Best is trial 138 with value: 0.44226913036249527.[0m
[32m[I 2022-08-29 21:15:57,828][0m Trial 174 finished with value: 0.3298721975758403 and parameters: {'sma_lag': 14, 'big_sma_lag': 334}. Best is trial 138 with value: 0.44226913036249527.[0m
[32m[I 2022-08-29 21:1

In [65]:
study.trials_dataframe().sort_values("value", ascending=False).head(5)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_big_sma_lag,params_sma_lag,state
138,138,0.442269,2022-08-29 21:15:57.487672,2022-08-29 21:15:57.496527,0 days 00:00:00.008855,353,13,COMPLETE
190,190,0.437222,2022-08-29 21:15:57.967221,2022-08-29 21:15:57.976143,0 days 00:00:00.008922,318,13,COMPLETE
142,142,0.435143,2022-08-29 21:15:57.523786,2022-08-29 21:15:57.532512,0 days 00:00:00.008726,354,13,COMPLETE
144,144,0.435143,2022-08-29 21:15:57.542142,2022-08-29 21:15:57.550856,0 days 00:00:00.008714,354,13,COMPLETE
162,162,0.435143,2022-08-29 21:15:57.706171,2022-08-29 21:15:57.715272,0 days 00:00:00.009101,354,13,COMPLETE


In [72]:
start = pd.Timestamp("2022-08-01T15:25:00").tz_localize(tz)
end = pd.Timestamp("2022-08-26T16:00:00").tz_localize(tz)
# df = process_bars(bars_train, 20, 403)
df = process_bars(bars_test, 13, 353)
# df = process_bars(bars_train, 13, 353)
plot_candles(df[start:end])
plot_strategy(df)


# plo(bars_test, 403, "low", "high", True)

In [59]:
bars.shape

(98494, 8)