In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns # visualization library
import matplotlib.pyplot as plt # visualization library
from plotly.offline import init_notebook_mode, iplot # plotly offline mode
import plotly.graph_objs as go # plotly graphical object

import src.data.timeseries_eda as eda 

init_notebook_mode(connected=True)
# ignore filters
warnings.filterwarnings("ignore") # if there is a warning after some codes, this will avoid us to see them.
plt.style.use('ggplot')

In [3]:
symbol = 'XAUUSD'

In [4]:
market_data = pd.read_csv(f'data/{symbol}_4h.csv')
market_data.head()

Unnamed: 0,open_bid,low_bid,high_bid,close_bid,volume_bid,open_ask,low_ask,high_ask,close_ask,volume_ask,avg_spread,tick_number
0,1183.410034,1183.409058,1187.402954,1186.664062,0.54846,1185.050049,1184.531982,1187.847046,1187.108032,0.6448,0.511757,1476
1,1186.684082,1184.260986,1188.281982,1186.182007,2.10916,1187.119995,1184.578003,1188.621094,1186.478027,2.17188,0.325975,4428
2,1186.260986,1184.480957,1187.481079,1185.842041,3.02581,1186.56897,1184.78894,1187.761963,1186.141968,2.69611,0.299324,6195
3,1185.881104,1180.581055,1187.940918,1182.71106,7.58135,1186.141968,1180.890991,1188.302002,1182.980957,8.02604,0.277019,13557
4,1182.851074,1167.430054,1194.330933,1187.940918,16.77736,1182.980957,1167.908936,1194.708008,1188.234985,18.69664,0.277561,33036


In [5]:
from src.data.feature_engineering import PercentageFeatureAdder, BollingerBandAdder, TargetAdder, Scalar

percentage_feature_adder = PercentageFeatureAdder(1)
bidbb_adder = BollingerBandAdder(kind = 'bid')
askbb_adder = BollingerBandAdder(kind = 'ask')
target_adder = TargetAdder(step_number=1 , look_back_steps =2)

pipeline = [bidbb_adder, askbb_adder, percentage_feature_adder, target_adder]

In [6]:
transformed = market_data.copy().dropna()
for idx , trans in enumerate(pipeline):
    print(f'Transformation number: {idx + 1}')
    transformed = trans.transform(transformed)

Transformation number: 1
Transformation number: 2
Transformation number: 3
Transformation number: 4


In [7]:
features = []
for col in transformed.columns:
    features.append(col)
feature = features[4:5]+features[9:-4]+features[-2:]
target = features[-4:-2]

In [8]:
from sklearn.model_selection import train_test_split
train_features, test_features, train_target, test_target = train_test_split(
    transformed[feature].dropna(), transformed[target].dropna(), test_size=0.2, random_state=4
)

In [9]:
train_features = train_features.shift(-1)

In [11]:
from src.models.boosting import XGB 

# Train the model
xgb = XGB()
model = xgb.xgb_model()
model.fit(train_features, train_target)

Parameters: { "boosting_type" } are not used.



In [12]:


# Make predictions on the test set
predictions = model.predict(test_features)

In [13]:
predictions

array([[0.23596518, 0.2596026 ],
       [0.1641417 , 0.07490858],
       [0.35870737, 0.12701744],
       ...,
       [0.29240873, 0.20692572],
       [0.17060989, 0.21714103],
       [0.11740733, 0.3667165 ]], dtype=float32)

In [15]:
def xgb_quantile_eval(preds, labels, quantile=0.2):
    """
    Customized evaluational metric that equals
    to quantile regression loss (also known as
    pinball loss).
    Quantile regression is regression that
    estimates a specified quantile of target's
    distribution conditional on given features.
    @type preds: numpy.ndarray
    @type labels: numpy.ndarray
    @type quantile: float
    @rtype: float
    """
    return np.nanmean(
        (preds >= labels) * (1 - quantile) * (preds - labels) + (preds < labels) * quantile * (labels - preds)
    )

loss = xgb_quantile_eval(predictions,test_target,0.2)
print(loss)

0.21517598666415572
