In [1]:
from datetime import datetime, date, timedelta, time
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots


from quantrion import settings
from quantrion.asset.alpaca import AlpacaUSStock, AlpacaUSStockListProvider

load_dotenv()
%load_ext autoreload
%autoreload 2

In [2]:
asset = AlpacaUSStock("AAPL")

In [27]:
start = asset.dt.now() - timedelta(days=1)
df = await asset.bars.get(start, freq="3min")
b_sma = await asset.bars.get_sma(start, freq="3min", n=100)
lower, sma, upper = await asset.bars.get_bollinger_bands(start, freq="3min")

In [28]:
# 161.202512
# b_sma, bands
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Candlestick(
        x=df.index, open=df["open"], high=df["high"], low=df["low"], close=df["close"]
    ),
)
fig.add_trace(
    go.Scatter(x=sma.index, y=sma),
)
fig.add_trace(
    go.Scatter(x=lower.index, y=lower),
)
fig.add_trace(
    go.Scatter(x=upper.index, y=upper),
)
fig.add_trace(
    go.Scatter(
        x=b_sma.index,
        y=b_sma,
    ),
)
fig.update_xaxes(
    rangeslider_visible=True,
    rangebreaks=[
        dict(bounds=["sat", "mon"]),  # hide weekends, eg. hide sat to before mon
        dict(bounds=[16, 9.5], pattern="hour"),  # hide hours outside of 9.30am-4pm
    ],
)
fig.show()

In [37]:
_bars_resample_funcs = {
    "open": "first",
    "high": "max",
    "low": "min",
    "close": "last",
    "volume": "sum",
    "price": "sum",
    "n_trades": "sum",
}
bars = pd.read_csv("./files/apple_historical.csv")
bars["start"] = pd.DatetimeIndex(pd.to_datetime(bars["start"], utc=True)).tz_convert(
    "America/New_York"
)
bars = bars.set_index("start")
bars = bars[bars.index.dayofweek <= 4]
bars = bars[(bars.index.time <= time(16, 0)) & (bars.index.time >= time(9, 30))]
bars["price"] *= bars["volume"]
bars = bars.resample("3min").aggregate(_bars_resample_funcs).dropna()
bars["price"] /= bars["volume"]
_bars_fill_values = {
    "volume": 0,
}
bars = bars.fillna(_bars_fill_values)
bars = bars.dropna()
bars.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 33007 entries, 2021-08-26 14:06:00-04:00 to 2022-08-26 13:51:00-04:00
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   open      33007 non-null  float64
 1   high      33007 non-null  float64
 2   low       33007 non-null  float64
 3   close     33007 non-null  float64
 4   volume    33007 non-null  int64  
 5   price     33007 non-null  float64
 6   n_trades  33007 non-null  int64  
dtypes: float64(5), int64(2)
memory usage: 2.0 MB


In [38]:
spread = 0.012
tz = "America/New_York"
split_dt = pd.Timestamp("2022-06-01").tz_localize(tz)
bars_train = bars[:split_dt]
bars_test = bars[split_dt:]


def plot_candles(df):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(
        go.Candlestick(
            x=df.index,
            open=df["open"],
            high=df["high"],
            low=df["low"],
            close=df["close"],
        ),
    )
    fig.add_trace(
        go.Scatter(x=df.index, y=df["SMA"]),
    )
    fig.add_trace(
        go.Scatter(x=df.index, y=df["LB"]),
    )
    fig.add_trace(
        go.Scatter(x=df.index, y=df["UB"]),
    )
    fig.add_trace(
        go.Scatter(
            x=df.index,
            y=df["positions"],
        ),
        secondary_y=True,
    )
    fig.add_trace(
        go.Scatter(
            x=df.index,
            y=df["B_SMA"],
        ),
    )
    fig.update_xaxes(
        rangeslider_visible=True,
        rangebreaks=[
            dict(bounds=["sat", "mon"]),  # hide weekends, eg. hide sat to before mon
            dict(bounds=[16, 9.5], pattern="hour"),  # hide hours outside of 9.30am-4pm
        ],
    )
    fig.update_yaxes(secondary_y=True)
    fig.show()


def plot_strategy(df):
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_traces(
        [
            go.Scatter(x=df.index, y=df["strategy"], name="strategy"),
            go.Scatter(x=df.index, y=df["benchmark"], name="benchmark"),
            go.Scatter(x=df.index, y=df["trades"], name="trades"),
        ],
        secondary_ys=[False, False, True],
    )
    fig.update_xaxes(
        rangeslider_visible=True,
        rangebreaks=[
            dict(bounds=["sat", "mon"]),  # hide weekends, eg. hide sat to before mon
            dict(bounds=[16, 9.5], pattern="hour"),  # hide hours outside of 9.30am-4pm
        ],
    )
    fig.show()


def process_bars(bars, sma_lag, big_sma_lag):
    bars = bars.copy()
    bars["SMA"] = bars["close"].rolling(sma_lag).mean()
    bars["B_SMA"] = bars["close"].rolling(big_sma_lag).mean()
    bars["STD"] = bars["close"].rolling(sma_lag).std()
    bars["LB"] = bars["SMA"] - 2 * bars["STD"]
    bars["UB"] = bars["SMA"] + 2 * bars["STD"]
    bars = bars.dropna(axis=0)
    bars["SMAdist"] = bars["B_SMA"] - bars["SMA"]
    bars.loc[
        (bars["close"] > bars["UB"]) & (bars["SMA"] < bars["B_SMA"]), "positions"
    ] = -1
    bars.loc[
        (bars["close"] < bars["LB"]) & (bars["SMA"] > bars["B_SMA"]), "positions"
    ] = 1
    bars.loc[bars["SMAdist"] * bars["SMAdist"].shift(1) < 0, "positions"] = 0

    #     taps_count = 0
    #     position = 0
    #     for index, bar in bars.iterrows():
    #         tap = bar["taps"]
    #         if tap == 0:
    #             continue
    #         if int(tap * taps_count) < 0:
    #             position = 0
    #             taps_count = 0
    #         taps_count += tap
    #         if tap < 0:
    #             position = 1
    #         if tap > 0:
    #             position = -1
    #         bars.loc[index, "positions"] = position
    #     bars["ctaps"] = bars.groupby(bars.index.date)["taps"].cumsum()

    #     bars.loc[bars[up_cross_stat] > bars["UB"], "positions"] = -1
    #     bars.loc[bars[low_cross_stat] < bars["LB"], "positions"] = 1
    #     bars.loc[bars.index.time >= time(15, 55), "positions"] = 0
    #     bars.loc[
    #         ((bars[low_cross_stat] <= bars["SMA"]) & (bars["positions"].ffill() == -1)) |
    #         ((bars[up_cross_stat] >= bars["SMA"]) & (bars["positions"].ffill() == 1)),
    #         "positions"
    #     ] = 0
    bars["positions"] = bars["positions"].ffill()
    bars["trades"] = bars["positions"].diff().abs().fillna(0).cumsum()
    bars["positions"] = bars["positions"].shift(1).fillna(0)
    bars["price_change"] = bars["close"].pct_change().fillna(0)
    bars["profit"] = bars["price_change"] * bars["positions"]
    bars["strategy"] = (1 + bars["profit"]).cumprod() - bars["trades"] * spread / bars[
        "price"
    ]
    bars["benchmark"] = (1 + bars["price_change"]).cumprod() - spread / bars["price"]
    return bars


import optuna


def objective(trial):
    sma_lag = trial.suggest_int("sma_lag", 10, 50)
    big_sma_lag = trial.suggest_int("big_sma_lag", sma_lag, 500)
    #     up_cross_stat = trial.suggest_categorical("up_cross_stat", ["open", "low", "high", "close", "price"])
    #     low_cross_stat = trial.suggest_categorical("low_cross_stat", ["open", "low", "high", "close", "price"])
    df = process_bars(bars_train, sma_lag, big_sma_lag)
    return (df["strategy"][-1] - df["strategy"][0]) / df["strategy"][0]


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=200)

[32m[I 2022-09-01 10:41:44,266][0m A new study created in memory with name: no-name-9f3fef2b-2579-422e-8290-7fdb5475aae4[0m
[32m[I 2022-09-01 10:41:44,284][0m Trial 0 finished with value: 0.22319626691367866 and parameters: {'sma_lag': 18, 'big_sma_lag': 428}. Best is trial 0 with value: 0.22319626691367866.[0m
[32m[I 2022-09-01 10:41:44,292][0m Trial 1 finished with value: 0.06371153380159211 and parameters: {'sma_lag': 38, 'big_sma_lag': 214}. Best is trial 0 with value: 0.22319626691367866.[0m
[32m[I 2022-09-01 10:41:44,301][0m Trial 2 finished with value: 0.09275266487816003 and parameters: {'sma_lag': 12, 'big_sma_lag': 135}. Best is trial 0 with value: 0.22319626691367866.[0m
[32m[I 2022-09-01 10:41:44,311][0m Trial 3 finished with value: 0.19419336719884028 and parameters: {'sma_lag': 24, 'big_sma_lag': 415}. Best is trial 0 with value: 0.22319626691367866.[0m
[32m[I 2022-09-01 10:41:44,323][0m Trial 4 finished with value: 0.14562957454950842 and parameters: {'s

In [39]:
study.trials_dataframe().sort_values("value", ascending=False).head(5)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_big_sma_lag,params_sma_lag,state
174,174,0.440001,2022-09-01 10:41:46.238198,2022-09-01 10:41:46.247351,0 days 00:00:00.009153,348,13,COMPLETE
75,75,0.437222,2022-09-01 10:41:45.163151,2022-09-01 10:41:45.173617,0 days 00:00:00.010466,318,13,COMPLETE
181,181,0.437222,2022-09-01 10:41:46.312646,2022-09-01 10:41:46.323748,0 days 00:00:00.011102,318,13,COMPLETE
155,155,0.435791,2022-09-01 10:41:46.033474,2022-09-01 10:41:46.043471,0 days 00:00:00.009997,350,13,COMPLETE
161,161,0.435143,2022-09-01 10:41:46.097046,2022-09-01 10:41:46.107893,0 days 00:00:00.010847,354,13,COMPLETE


In [48]:
# start = pd.Timestamp("2022-08-01T15:25:00").tz_localize(tz)
# end = pd.Timestamp("2022-08-26T16:00:00").tz_localize(tz)
start = pd.Timestamp("2022-01-01T15:25:00").tz_localize(tz)
end = pd.Timestamp("2022-01-05T16:00:00").tz_localize(tz)
# df = process_bars(bars_train, 20, 403)
df = process_bars(bars_train, 13, 348)
# df = process_bars(bars_train, 13, 353)
plot_candles(df[start:end])
plot_strategy(df)


# plo(bars_test, 403, "low", "high", True)

In [20]:
freq = "3min"
%timeit mdt.now().floor(freq) - pd.Timedelta(freq)

48.8 µs ± 1.29 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
