In [9]:
#Time series prediction with tuned XGBoost

In [10]:
import pandas as pd
from sklearn.metrics import r2_score
import plotly.graph_objects as go
import numpy as np
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV

In [11]:
def loaddata():
    df = pd.read_csv("C:/Temp/EURUSD1646217573.035035.csv",
                                header = 0,
                                parse_dates= True,
                                infer_datetime_format=True)
    df['timestamp']= pd.to_numeric(df['timestamp'].astype('datetime64[ns]').sort_values()).sort_values
    return df

In [12]:
def series_to_supervised(df,sequence_length):
	x=[]
	y=[]
	for i in range(sequence_length , len(df)-2*sequence_length):
		x.append(np.array(df[i-sequence_length:i]))
		y.append(np.array(df[i:i+sequence_length]))
	x= np.concatenate(x,axis = 0)
	y = np.concatenate(y,axis = 0)
	x = x.reshape(-1,1)
	y = y.reshape(-1,1)
	return x,y

In [13]:
def xgboost_forecast(train, X_test,sequence_length):
	params = {
        'learning_rate': [0.0001,0.001,0.01, 0.1],
        'max_depth': [ 5, 10,20,50,100],
        'n_estimators' : [100, 500,1000],
        'objective': ['reg:squarederror']
    }
	X_train, y_train = series_to_supervised(train,sequence_length)
	model = XGBRegressor()
	reg = RandomizedSearchCV(estimator=model,
                             							param_distributions=params,
                             							scoring='neg_mean_squared_error',
                             							n_iter=25,
                             							n_jobs=4,
                             							verbose=10)

	reg.fit(X_train, y_train)
	model.fit(X_train,y_train)
	bestmodel= reg.best_estimator_
	yhat= model.predict(X_test)
	bestyhat = bestmodel.predict(X_test)
	return yhat,bestyhat

In [14]:
def plot(yhat,bestyhat,y_test):
	fig = go.Figure()
	fig.add_trace(go.Scatter(y=yhat, name='Predicted', mode='markers'))
	fig.add_trace(go.Scatter(y=y_test, name="True", mode='markers'))
	fig.add_trace(go.Scatter(y=bestyhat, name="Predicted after tuned", mode='markers'))
	fig.update_xaxes(title='Time')
	fig.update_yaxes(title='Price')
	fig.update_layout(title_text="Intraday price of EUR/USD")
	fig.show()

In [15]:
def main():
	df = loaddata()
	train,test = train_test_split(df['close'], test_size=0.3,train_size=0.7,random_state=0,shuffle=False)
	sequence_length = 4
	X_test,y_test = series_to_supervised(test,sequence_length)
	yhat, bestyhat = xgboost_forecast(train,X_test,sequence_length)
	y_test = y_test.reshape(-1,)
	print(f"r2 score before tuning {r2_score(yhat,y_test)}")
	print(f'r2 score after tuning{ r2_score(bestyhat,y_test)}')
	plot(yhat,bestyhat,y_test)


In [16]:
if __name__ == '__main__':
    main()

Fitting 5 folds for each of 25 candidates, totalling 125 fits
r2 score before tuning -0.4545414118863398
r2 score after tuning-0.5052915823124988
