In [1]:
import pandas as pd
# pd.options.mode.chained_assignment = None
import numpy as np
import joblib
from autoIG.epics import Epics,Tickers
from autoIG.config import Source
from autoIG.utils import print_shape,ROOT_DIR
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.ensemble import StackingRegressor
from sklearn import set_config
from autoIG.modelling import (
    create_future_bid_Open,
    fillna_,
    create_past_ask_Open,
    normalise_,
    generate_target_2,
    adapt_IG_data_for_training,
    adapt_YF_data_for_training,
)
# import mlflow
# import mlflow.sklearn
# mlflow.autolog()
# mlflow.set_tracking_uri("file://"+str(ROOT_DIR.parent.absolute())+"/mlruns")
# mlflow.set_tracking_uri("http://localhost:5000")
# mlflow.set_registry_uri("sqlite://"+str(ROOT_DIR.parent.absolute())+ "/mlflow.db")
# mlflow.set_registry_uri()
set_config(transform_output="pandas")

In [2]:
# mlflow.tracking.get_tracking_uri()
# mlflow.tracking.get_registry_uri()
# mlflow.get_artifact_uri()

In [3]:
# Persist the data, so we can train on the same dataset each time.
# This should be tracked with DVC
SOURCE = Source["YF"].name
RELOAD_DATA = False
SAVE_MODEL = False
# !! BUY AT ASK
# !! SELL AT BID
model_config = dict()
model_config["NUMBER_OF_PAST_ASKS"] = 15  # This is for training.
model_config["EPIC"] = Epics.BITCOIN_EPIC.name
model_config["TICKER"] = Tickers.BITCOIN_TICKER.name

historical_prices_config = dict()
historical_prices_config["resolution"] = "1Min"
historical_prices_config["numpoints"] = 500

In [4]:
if RELOAD_DATA:
    if SOURCE == "IG":
        from trading_ig.config import config
        from trading_ig.rest import IGService
        ig_service = IGService(config.username, config.password, config.api_key)
        ig = ig_service.create_session()
        results_ = ig_service.fetch_historical_prices_by_epic(
            model_config["EPIC"], **historical_prices_config
        )
        model_data = results_["prices"]
        model_data.to_pickle("model_data_ig.pkl")
    if SOURCE == "YF":
        import yfinance as yf
        ticker = yf.Ticker(model_config["TICKER"])
        model_data = ticker.history(
            interval="1m", start="2022-12-05", end="2022-12-10"
        )
        model_data.to_pickle("model_data_yf.pkl")
    else:
        Exception("Please provide source to reload data from: (IG/YF)")

else:
    if SOURCE == "IG":
        model_data = pd.read_pickle("model_data_ig.pkl")
    if SOURCE == "YF":
        model_data = pd.read_pickle("model_data_yf.pkl")


In [5]:
if SOURCE == 'IG':
    model_data= model_data.pipe(adapt_IG_data_for_training)
if SOURCE == 'YF':
    model_data=  model_data.pipe(adapt_YF_data_for_training)
model_data.pipe(print_shape).head(3)

Shape: 6,873 4


Unnamed: 0_level_0,ASK_OPEN,BID_OPEN,BID_OPEN_S1,r
UPDATED_AT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-12-05 00:00:00+00:00,17128.894531,17131.894531,17125.783203,0.999818
2022-12-05 00:01:00+00:00,17122.783203,17125.783203,17121.808594,0.999943
2022-12-05 00:02:00+00:00,17118.808594,17121.808594,17117.835938,0.999943


In [6]:
# Difference between ask price (buy for) and bid price (sell price)
(model_data.ASK_OPEN - model_data.BID_OPEN).describe()

count    6873.0
mean       -3.0
std         0.0
min        -3.0
25%        -3.0
50%        -3.0
75%        -3.0
max        -3.0
dtype: float64

In [8]:
from functools import partial
create_past_ask_Open_num_small = partial(create_past_ask_Open,num = 3)
create_past_ask_Open_num_medium = partial(create_past_ask_Open,num = 10)
create_past_ask_Open_num_large = partial(create_past_ask_Open,num = 15)

In [20]:
fillna_transformer = FunctionTransformer(fillna_)
normalise_transformer = FunctionTransformer(normalise_)
pl1 = Pipeline(
    [
        ("add_past_period_columns", FunctionTransformer(create_past_ask_Open_num_small)),
        ("fill_na", fillna_transformer),
        ("normalise", normalise_transformer),
        ("predictor", LinearRegression()),
    ]
)
pl2 = Pipeline(
    [
        ("add_past_period_columns", FunctionTransformer(create_past_ask_Open_num_medium)),
        ("fill_na", fillna_transformer),
        ("normalise", normalise_transformer),
        ("predictor", LinearRegression()),
    ]
)
pl3 = Pipeline(
    [
        ("add_past_period_columns", FunctionTransformer(create_past_ask_Open_num_large)),
        ("fill_na", fillna_transformer),
        ("normalise", normalise_transformer),
        ("predictor", LinearRegression()),
    ]
)

stack = StackingRegressor(
    [("small_lookback", pl1), ("medium_lookback", pl2), ("large_lookback", pl3)], final_estimator=LinearRegression()
)
stack


In [10]:
X = model_data[['ASK_OPEN']]
y = model_data['r']

In [11]:
stack.fit(X,y)

In [12]:
for  x,i in enumerate([pl1,pl2,pl3]):
    i.fit(X,y)
    print(f'Pipeline {x} prediction: {pd.Series(i.predict(X))[1]}' )
print(f'Stacked prediction: {stack.predict(X)[1]}')


Pipeline 0 prediction: 1.0000868119126154
Pipeline 1 prediction: 1.0000865497720413
Pipeline 2 prediction: 1.0000865181305065
Stacked prediction: 1.0000793682499196




In [13]:
# persist the model
if SAVE_MODEL:
    joblib.dump(stack,'model.pkl')
else:
    stack = joblib.load('model.pkl')

In [14]:
stack

In [15]:
stack.feature_names_in_

array(['ASK_OPEN'], dtype=object)

In [16]:
stack.final_estimator_.coef_

array([0.33627437, 0.54713913, 0.2099421 ])

In [17]:

from autoIG.utils import selling_lengths_read_
selling_lengths_read_()

[21, 22, 23, 24, 4, 5, 6, 7, 8]

In [18]:
from autoIG.utils import read_stream_
# l,s = read_stream_(nrows = 0)
# s.resample()


In [19]:
y.describe()

count    6873.000000
mean        1.000176
std         0.000191
min         0.996834
25%         1.000112
50%         1.000177
75%         1.000241
max         1.002522
Name: r, dtype: float64