In [1]:
import lightningchart as lc
import random

lc.set_license('P001-ZgAiXcls82XucLjGo1pNDBmgknE/FgAGIi2wGJsKgHy4pMYuLCxrxf4UXnNH-MEQCIE7WrKAMcHSuauBhMtQTwX3JPNSQPhQvdGJFy5GoAArCAiBsiHovJHY6pyh59fLdvj+QT5ld5MbewJXf7NsivQvsUw==')

# Ignoring warnings
import warnings
warnings.simplefilter('ignore')

# Importing necessary modules
import sys
import pandas as pd
import numpy as np

import sklearn
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.svm import SVR
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, train_test_split
from scipy.stats import randint

import xgboost
from xgboost import XGBRegressor, DMatrix


In [2]:
# Load the CSV file
eu = pd.read_csv('./eu.csv', parse_dates=True, skipinitialspace=True)

# Strip spaces from column names
eu.columns = eu.columns.str.strip()

# Now, you can safely drop the 'date' column if it's not needed
eu.drop('date', axis='columns', inplace=True)

# Print columns to verify the correction
print(eu.columns)


Index(['open', 'high', 'low', 'close', 'volume'], dtype='object')


In [3]:
eu.isna().sum()

open      0
high      0
low       0
close     0
volume    0
dtype: int64

### Closing Price Line Chart

In [4]:
# Convert index to milliseconds for LightningChart
eu['timestamp_ms'] = eu.index.astype('int64') // 10**6

# Initialize the chart
chart = lc.ChartXY(
    theme=lc.Themes.Dark,
    title='Euro vs USD'
)

# Get the x and y values from your DataFrame
x_values = eu['timestamp_ms'].tolist()
y_values = eu['close'].tolist()

# Add the time series data to the chart
series = chart.add_line_series().append_samples(
    x_values=x_values,
    y_values=y_values
)
series.set_line_thickness(2)  # Adjust the line thickness as needed

# Customize the axes
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')
x_axis.set_title('Date')
chart.get_default_y_axis().set_title('EUR/USD Exchange Rate')

# Open the chart
chart.open()


In [5]:
#removing outliers above
vol_cut_off = eu.volume.std()*3 + eu.volume.mean()
eu.volume[eu.volume > vol_cut_off] = vol_cut_off

In [6]:
#removing ouliers below
vol_cut_off =  eu.volume.mean() - eu.volume.std()*3
eu.volume[eu.volume < vol_cut_off] = vol_cut_off

In [7]:
def generate_features(df):
    df_new = pd.DataFrame()

    # Copying original features and generating new ones
    df_new['open'] = df['open']
    df_new['open_1'] = df['open'].shift(1)
    df_new['close_1'] = df['close'].shift(1)
    df_new['high_1'] = df['high'].shift(1)
    df_new['low_1'] = df['low'].shift(1)
    df_new['volume_1'] = df['volume'].shift(1)

    # Additional features based on rolling averages and standard deviations
    df_new['avg_price_5'] = df['close'].rolling(window=5).mean().shift(1)
    # Continue with other features as before...

    # Standard deviations of prices
    df_new['std_price_5'] = df['close'].rolling(window=5).std().shift(1)
    # Continue with other features as before...

    # Returns and moving averages of returns
    df_new['return_1'] = ((df['close'] - df['close'].shift(1)) / df['close'].shift(1)).shift(1)
    df_new['moving_avg_5'] = df_new['return_1'].rolling(window=5).mean()

    # Set the target
    df_new['close'] = df['close']
    df_new = df_new.dropna(axis=0)  # Drop rows with NaN values
    return df_new

# Generate features with the corrected DataFrame
data = generate_features(eu)


In [8]:
# Display the head of the DataFrame to ensure it's correct
data.head()

Unnamed: 0,open,open_1,close_1,high_1,low_1,volume_1,avg_price_5,std_price_5,return_1,moving_avg_5,close
1971-01-12,0.5371,0.5371,0.5371,0.5371,0.5371,1.0,0.53682,0.000277,0.0,7.5e-05,0.5371
1971-01-13,0.5373,0.5371,0.5371,0.5371,0.5371,1.0,0.53692,0.000268,0.0,0.000186,0.5373
1971-01-14,0.5372,0.5373,0.5373,0.5373,0.5373,1.0,0.53708,0.000179,0.000372,0.000298,0.5372
1971-01-15,0.5376,0.5372,0.5372,0.5372,0.5372,1.0,0.53716,8.9e-05,-0.000186,0.000149,0.5376
1971-01-18,0.5379,0.5376,0.5376,0.5376,0.5376,1.0,0.53726,0.000207,0.000745,0.000186,0.5379


In [9]:
data.describe()

Unnamed: 0,open,open_1,close_1,high_1,low_1,volume_1,avg_price_5,std_price_5,return_1,moving_avg_5,close
count,12109.0,12109.0,12109.0,12109.0,12109.0,12109.0,12109.0,12109.0,12109.0,12109.0,12109.0
mean,1.073095,1.073047,1.073089,1.076972,1.069197,19608.420233,1.072992,0.005757,8.1e-05,8.1e-05,1.073137
std,0.234166,0.234217,0.234213,0.236284,0.232156,34736.673334,0.234228,0.004082,0.006418,0.002891,0.234163
min,0.5367,0.5367,0.5367,0.5367,0.5367,1.0,0.53682,4.5e-05,-0.056418,-0.019757,0.5367
25%,0.8792,0.8792,0.8791,0.8819,0.8759,231.0,0.87914,0.003026,-0.003256,-0.00158,0.8791
50%,1.1108,1.1108,1.1111,1.1151,1.10708,1891.0,1.110524,0.00489,0.0,6e-05,1.11113
75%,1.2576,1.2576,1.25767,1.2639,1.2519,11866.0,1.25812,0.007515,0.003352,0.001726,1.25767
max,1.5995,1.5995,1.5997,1.6038,1.5866,132937.440396,1.59108,0.04539,0.062078,0.020409,1.5997


In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 12109 entries, 1971-01-12 to 2019-05-09
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   open          12109 non-null  float64
 1   open_1        12109 non-null  float64
 2   close_1       12109 non-null  float64
 3   high_1        12109 non-null  float64
 4   low_1         12109 non-null  float64
 5   volume_1      12109 non-null  float64
 6   avg_price_5   12109 non-null  float64
 7   std_price_5   12109 non-null  float64
 8   return_1      12109 non-null  float64
 9   moving_avg_5  12109 non-null  float64
 10  close         12109 non-null  float64
dtypes: float64(11)
memory usage: 1.1 MB


### Volume Chart
Plot trading volume to analyze activity levels, which can give insights into market sentiment.

In [11]:
chart = lc.ChartXY(theme=lc.Themes.Dark, title='EUR/USD Trading Volume')
volume_values = data['volume_1'].tolist()  # Assuming 'volume_1' is your volume data

volume_series = chart.add_line_series()
volume_series.append_samples(x_values, volume_values)
volume_series.set_name('Volume')

# Axes setup
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')
y_axis = chart.get_default_y_axis()
y_axis.set_title('Volume')

# Open the chart
chart.open()


127.0.0.1 - - [19/Jul/2024 11:40:21] "GET / HTTP/1.1" 200 -


### Volatility Chart
Using the standard deviation of prices to create a chart that shows how volatility changes over time.

In [12]:
chart = lc.ChartXY(theme=lc.Themes.Dark, title='EUR/USD Price Volatility')
volatility_values = data['std_price_5'].tolist()  # 5-day standard deviation of close prices

volatility_series = chart.add_line_series()
volatility_series.append_samples(x_values, volatility_values)
volatility_series.set_name('Volatility')

# Axes setup
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')
y_axis = chart.get_default_y_axis()
y_axis.set_title('Volatility')

# Open the chart
chart.open()


127.0.0.1 - - [19/Jul/2024 11:40:20] "GET / HTTP/1.1" 200 -


### TRAIN AND TEST DATA

In [13]:
#import datetime module
import datetime

#segregate data for training
start_train = datetime.datetime(1999, 1, 1,0,0)
end_train = datetime.datetime(2017, 12, 31, 0, 0)
data_train = data.loc[start_train:end_train]
data_train.describe()

Unnamed: 0,open,open_1,close_1,high_1,low_1,volume_1,avg_price_5,std_price_5,return_1,moving_avg_5,close
count,4941.0,4941.0,4941.0,4941.0,4941.0,4941.0,4941.0,4941.0,4941.0,4941.0,4941.0
mean,1.207845,1.207844,1.207882,1.213555,1.202155,40948.114379,1.207882,0.00647,2e-05,2.1e-05,1.207885
std,0.172721,0.172721,0.172677,0.173244,0.172127,40535.398337,0.172545,0.003894,0.00632,0.002745,0.172676
min,0.8272,0.8272,0.8267,0.8326,0.8227,1.0,0.83356,0.000365,-0.026604,-0.013307,0.8267
25%,1.08676,1.08676,1.08672,1.0917,1.08095,9051.0,1.08686,0.003785,-0.00371,-0.001699,1.08672
50%,1.2326,1.2326,1.23232,1.2382,1.22604,13905.0,1.232142,0.005628,0.000106,5.7e-05,1.23232
75%,1.3362,1.3362,1.3362,1.341,1.32991,69037.0,1.335616,0.008164,0.003518,0.001813,1.3362
max,1.5995,1.5995,1.5997,1.6038,1.5866,132937.440396,1.59108,0.045328,0.034903,0.020409,1.5997


In [14]:
#segregate data for validation
start_test = datetime.datetime(2018, 1, 1, 0, 0)
end_test = datetime.datetime(2019, 6, 7, 0, 0)
data_test = data.loc[start_test:end_test]
data_test.describe()

Unnamed: 0,open,open_1,close_1,high_1,low_1,volume_1,avg_price_5,std_price_5,return_1,moving_avg_5,close
count,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0
mean,1.168335,1.168551,1.168404,1.172679,1.164774,85860.709055,1.16879,0.004493,-0.000176,-0.000156,1.168174
std,0.03868,0.038614,0.038646,0.039068,0.038099,28427.264667,0.038265,0.002091,0.004373,0.001806,0.0387
min,1.11322,1.11322,1.11313,1.1162,1.11105,698.0,1.116498,0.00102,-0.019107,-0.004603,1.11313
25%,1.13658,1.13675,1.13657,1.14091,1.13353,64344.0,1.136546,0.002958,-0.002896,-0.001444,1.13655
50%,1.15872,1.15874,1.1593,1.16275,1.1543,79792.0,1.16104,0.004178,-7e-05,-0.000203,1.15877
75%,1.19503,1.19503,1.19503,1.20088,1.1926,107336.0,1.197898,0.005585,0.002727,0.00105,1.19474
max,1.25073,1.25073,1.25078,1.25554,1.2448,132937.440396,1.243208,0.01485,0.014013,0.005369,1.25078


In [15]:
X_train = data_train.drop('close', axis='columns')
y_train = data_train.close

X_test = data_test.drop('close', axis='columns')
y_test = data_test.close

#checking the shape of the train and test data
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((4941, 10), (349, 10), (4941,), (349,))

### SCALING THE PREDICTOR DATA

In [16]:
#initiating standard scaler
scaler = StandardScaler()

#fit the scaler in training features
scaler.fit(X_train)

#Rescale both sets using the trained scaler
X_scaled_train = scaler.transform(X_train)
X_scaled_test = scaler.transform(X_test)

### LINEAR REGRESSION (LIN)

In [17]:
lin = LinearRegression()

lin.fit(X_scaled_train, y_train)
predictions_lin = lin.predict(X_scaled_test)

print('RMSE: {0:.3f}'.format(mean_squared_error(y_test, predictions_lin)**0.5))
print('MAE: {0:.3f}'.format(mean_absolute_error(y_test, predictions_lin)))
print('R^2: {0:.3f}'.format(r2_score(y_test, predictions_lin)))

RMSE: 0.005
MAE: 0.004
R^2: 0.983


In [18]:
# Assuming 'data_test' and your predictions and truths are properly loaded
data_test['timestamp_ms'] = (data_test.index.astype('int64') // 10**6).tolist()

# Initialize the LightningChart
chart = lc.ChartXY(
    theme=lc.Themes.Dark,
    title='Euro vs USD - Prediction vs Truth'
)

# Convert your data to lists (ensure this is done before appending to the series)
x_values = data_test['timestamp_ms'].tolist()
y_values_true = y_test.tolist()  # Assuming y_test is your true values Series
y_values_pred = predictions_lin.tolist()  # Assuming predictions_lin is your predictions Series

# Adding series for actual data and predictions
series_true = chart.add_line_series()
series_true.append_samples(x_values, y_values_true)
series_true.set_line_thickness(2)
series_true.set_name('Truth')

series_pred = chart.add_line_series()
series_pred.append_samples(x_values, y_values_pred)
series_pred.set_line_thickness(2)
series_pred.set_name('Predictions')

# Customize the axes
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')
x_axis.set_title('Date')

y_axis = chart.get_default_y_axis()
y_axis.set_title('Values')

# Add legend to the chart
legend = chart.add_legend()
legend.add(series_true)
legend.add(series_pred)

# Open the chart
chart.open()


### SGD REGRESSOR (SGD)

In [19]:
param_grid = {
    'penalty':['l1', 'l2', 'elasticnet'],
    "alpha": [1e-5, 3e-5, 1e-4],
    "eta0": [0.01, 0.03, 0.1],
}

sgd = SGDRegressor()
grid_search = GridSearchCV(sgd, param_grid, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
grid_search.fit(X_scaled_train, y_train)

print(grid_search.best_params_)

sgd_best = grid_search.best_estimator_
#print(grid_search.best_score_)

predictions_sgd = sgd_best.predict(X_scaled_test)

#evaluating the predictions
print('RMSE: {0:.3f}'.format(mean_squared_error(y_test, predictions_sgd)**0.5))
print('MAE: {0:.3f}'.format(mean_absolute_error(y_test, predictions_sgd)))
print('R^2: {0:.3f}'.format(r2_score(y_test, predictions_sgd)))

{'alpha': 0.0001, 'eta0': 0.01, 'penalty': 'elasticnet'}
RMSE: 0.005
MAE: 0.004
R^2: 0.982


In [20]:
# Assume your 'data_test' DataFrame and the 'y_test', 'predictions_sgd' are loaded properly
data_test['timestamp_ms'] = (data_test.index.astype('int64') // 10**6).tolist()

chart = lc.ChartXY(
    theme=lc.Themes.Dark,
    title='Gold price: Prediction vs Truth - SGD Regressor'
)

# Ensure conversion to list where required
x_values = data_test['timestamp_ms'].tolist()
y_test_values = np.array(y_test).tolist()  # Convert to numpy array if not already, then to list
predictions_sgd_values = np.array(predictions_sgd).tolist()  # Same as above

series_truth = chart.add_line_series()
series_truth.append_samples(x_values, y_test_values)
series_truth.set_line_thickness(2)
series_truth.set_name('Truth')

series_sgd = chart.add_line_series()
series_sgd.append_samples(x_values, predictions_sgd_values)
series_sgd.set_line_thickness(2)
series_sgd.set_name('SGD')

x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')
x_axis.set_title('Date')

y_axis = chart.get_default_y_axis()
y_axis.set_title('Gold Price (USD)')

legend = chart.add_legend()
legend.add(series_truth)
legend.add(series_sgd)

chart.open()

### Extreme Gradient Boosting Regressor (XGB)

In [21]:
xgb = XGBRegressor()

data_dmatrix = DMatrix(data=X_train,
                           label=y_train)

xgb_param_grid = {'learning_rate': [0.001, 0.01, 0.1, 1],
                  'n_estimators': [50, 100, 200, 300],
                  'subsample': [0.3, 0.5, 0.7, 1]}

grid_search = GridSearchCV(estimator=xgb,    
                        param_grid=xgb_param_grid,
                        scoring='neg_mean_squared_error', 
                        cv=4, 
                        verbose=1,
                       n_jobs=-1)

grid_search.fit(X_train, y_train) 

print("Best parameters found: ",grid_search.best_params_)

xgb_best = grid_search.best_estimator_

xgb_best.fit(X_train,y_train)
predictions_xgb = xgb_best.predict(X_test)

print('RMSE: {0:.3f}'.format(mean_squared_error(y_test, predictions_xgb)**0.5))
print('MAE: {0:.3f}'.format(mean_absolute_error(y_test, predictions_xgb)))
print('R^2: {0:.3f}'.format(r2_score(y_test, predictions_xgb)))

Fitting 4 folds for each of 64 candidates, totalling 256 fits
Best parameters found:  {'learning_rate': 0.1, 'n_estimators': 200, 'subsample': 1}
RMSE: 0.006
MAE: 0.005
R^2: 0.978


127.0.0.1 - - [19/Jul/2024 11:39:21] "GET / HTTP/1.1" 200 -


127.0.0.1 - - [19/Jul/2024 11:39:22] "GET / HTTP/1.1" 200 -


127.0.0.1 - - [19/Jul/2024 11:39:22] "GET / HTTP/1.1" 200 -


In [22]:
# Make sure your 'data_test', 'y_test', and 'predictions_xgb' are loaded and prepared
data_test['timestamp_ms'] = (data_test.index.astype('int64') // 10**6).tolist()

# Initialize the LightningChart
chart = lc.ChartXY(
    theme=lc.Themes.Dark,
    title='Gold price: Prediction vs Truth - XGB Regressor'
)

# Ensure all data used in plotting is converted to lists explicitly
x_values = data_test['timestamp_ms'].tolist()
y_test_values = np.array(y_test).tolist() if isinstance(y_test, (pd.Series, np.ndarray)) else list(y_test)
predictions_xgb_values = np.array(predictions_xgb).tolist() if isinstance(predictions_xgb, (pd.Series, np.ndarray)) else list(predictions_xgb)

# Debug to confirm data types
print("X Values Type:", type(x_values))
print("Y Test Values Type:", type(y_test_values))
print("Predictions XGB Values Type:", type(predictions_xgb_values))

# Create line series for actual data and predictions
series_truth = chart.add_line_series()
series_truth.append_samples(x_values, y_test_values)
series_truth.set_line_thickness(2)
series_truth.set_name('Truth')

series_xgb = chart.add_line_series()
series_xgb.append_samples(x_values, predictions_xgb_values)
series_xgb.set_line_thickness(2)
series_xgb.set_name('XGB')

# Configure the axes for dates and values
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')
x_axis.set_title('Date')

y_axis = chart.get_default_y_axis()
y_axis.set_title('Gold Price (USD)')

# Add a legend to the chart
legend = chart.add_legend()
legend.add(series_truth)
legend.add(series_xgb)

# Open the chart
chart.open()

X Values Type: <class 'list'>
Y Test Values Type: <class 'list'>
Predictions XGB Values Type: <class 'list'>


127.0.0.1 - - [19/Jul/2024 11:40:19] "GET / HTTP/1.1" 200 -


### Bagging Regressor (BGR)

In [23]:
BaggingRegressor?

[1;31mInit signature:[0m
[0mBaggingRegressor[0m[1;33m([0m[1;33m
[0m    [0mestimator[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mn_estimators[0m[1;33m=[0m[1;36m10[0m[1;33m,[0m[1;33m
[0m    [1;33m*[0m[1;33m,[0m[1;33m
[0m    [0mmax_samples[0m[1;33m=[0m[1;36m1.0[0m[1;33m,[0m[1;33m
[0m    [0mmax_features[0m[1;33m=[0m[1;36m1.0[0m[1;33m,[0m[1;33m
[0m    [0mbootstrap[0m[1;33m=[0m[1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mbootstrap_features[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0moob_score[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mwarm_start[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mn_jobs[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mrandom_state[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mverbose[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
A Bagging regre

In [24]:
# Assume 'lin' is defined as an instance of a linear regression model
bgr = BaggingRegressor(estimator=lin, n_estimators=100, oob_score=True, n_jobs=-1)

bgr.fit(X_scaled_train, y_train)
predictions_bgr = bgr.predict(X_scaled_test)

print('OOB: {0:.3f}'.format(bgr.oob_score_))
print('RMSE: {0:.3f}'.format(mean_squared_error(y_test, predictions_bgr)**0.5))
print('MAE: {0:.3f}'.format(mean_absolute_error(y_test, predictions_bgr)))
print('R^2: {0:.3f}'.format(r2_score(y_test, predictions_bgr)))


OOB: 0.998
RMSE: 0.005
MAE: 0.004
R^2: 0.983


In [25]:
# Ensure 'data_test', 'y_test', and 'predictions_bgr' are correctly set up
data_test['timestamp_ms'] = (data_test.index.astype('int64') // 10**6).tolist()

# Initialize the LightningChart
chart = lc.ChartXY(
    theme=lc.Themes.Dark,
    title='Gold price: Prediction vs Truth - BGR'
)

# Convert dates for x-axis (timestamps in milliseconds)
x_values = data_test['timestamp_ms'].tolist()

# Ensure that y-values are lists
y_test_values = np.array(y_test).tolist() if isinstance(y_test, (pd.Series, np.ndarray)) else list(y_test)
predictions_bgr_values = np.array(predictions_bgr).tolist() if isinstance(predictions_bgr, (pd.Series, np.ndarray)) else list(predictions_bgr)

# Adding the actual test data series
series_truth = chart.add_line_series()
series_truth.append_samples(x_values, y_test_values)
series_truth.set_line_thickness(2)
series_truth.set_name('Truth')

# Adding the BGR predictions series
series_bgr = chart.add_line_series()
series_bgr.append_samples(x_values, predictions_bgr_values)
series_bgr.set_line_thickness(2)
series_bgr.set_name('BGR')

# Customize the axes
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')
x_axis.set_title('Date')

y_axis = chart.get_default_y_axis()
y_axis.set_title('Gold Price (USD)')

# Add a legend to the chart
legend = chart.add_legend()
legend.add(series_truth)
legend.add(series_bgr)

# Open the chart
chart.open()

127.0.0.1 - - [19/Jul/2024 11:40:19] "GET / HTTP/1.1" 200 -


### Random Forest Regressor (RF)

In [26]:
param_grid = {
    "max_depth": [30, 50],
    "min_samples_split": [5, 10, 20],

}

rf = RandomForestRegressor(n_estimators=100)
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

print(grid_search.best_params_)
# print(grid_search.best_score_)

rf_best = grid_search.best_estimator_
predictions_rf = rf_best.predict(X_test)

print('RMSE: {0:.3f}'.format(mean_squared_error(y_test, predictions_rf)**0.5))
print('MAE: {0:.3f}'.format(mean_absolute_error(y_test, predictions_rf)))
print('R^2: {0:.3f}'.format(r2_score(y_test, predictions_rf)))

{'max_depth': 50, 'min_samples_split': 5}
RMSE: 0.006
MAE: 0.005
R^2: 0.978


In [27]:
# Assuming 'data_test', 'y_test', and 'predictions_rf' are properly prepared
data_test['timestamp_ms'] = (data_test.index.astype('int64') // 10**6).tolist()

# Initialize the LightningChart
chart = lc.ChartXY(
    theme=lc.Themes.Dark,
    title='Gold price: Prediction vs Truth - Random Forest'
)

# Convert dates for x-axis (timestamps in milliseconds)
x_values = data_test['timestamp_ms'].tolist()

# Ensure that y-values are lists
y_test_values = np.array(y_test).tolist() if isinstance(y_test, (pd.Series, np.ndarray)) else list(y_test)
predictions_rf_values = np.array(predictions_rf).tolist() if isinstance(predictions_rf, (pd.Series, np.ndarray)) else list(predictions_rf)

# Adding the actual test data series
series_truth = chart.add_line_series()
series_truth.append_samples(x_values, y_test_values)
series_truth.set_line_thickness(2)
series_truth.set_name('Truth')

# Adding the Random Forest predictions series
series_rf = chart.add_line_series()
series_rf.append_samples(x_values, predictions_rf_values)
series_rf.set_line_thickness(2)
series_rf.set_name('RF')

# Customize the axes
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')
x_axis.set_title('Date')

y_axis = chart.get_default_y_axis()
y_axis.set_title('Gold Price (USD)')

# Add a legend to the chart
legend = chart.add_legend()
legend.add(series_truth)
legend.add(series_rf)

# Open the chart
chart.open()

127.0.0.1 - - [19/Jul/2024 11:40:19] "GET / HTTP/1.1" 200 -


In [28]:
# Assuming the predictions and y_test are prepared
# Calculate MAE for each model and create a series
mae_scoring = pd.Series({
    'LIN': mean_absolute_error(y_test, predictions_lin),
    'SGD': mean_absolute_error(y_test, predictions_sgd),
    'XGB': mean_absolute_error(y_test, predictions_xgb),
    'BGR': mean_absolute_error(y_test, predictions_bgr),
    'RFR': mean_absolute_error(y_test, predictions_rf)
})

# Find the model with the minimum MAE
mae_min = mae_scoring[mae_scoring == mae_scoring.min()]
print('The model with the least mean_absolute_error:\n', mae_min)

# Initialize the LightningChart BarChart
chart = lc.BarChart(vertical=True, theme=lc.Themes.Dark)

# Prepare data for the bar chart
data = [{'category': model, 'value': mae} for model, mae in mae_scoring.items()]

# Set data for the chart
chart.set_data(data)

# Disable sorting to maintain the original order of data
chart.set_sorting('disabled')

# Set the title for the chart
chart.set_title('Mean Absolute Error of Regression Models')

# Open the chart
chart.open()

The model with the least mean_absolute_error:
 BGR    0.003999
dtype: float64


127.0.0.1 - - [19/Jul/2024 11:40:16] "GET / HTTP/1.1" 200 -


### Saving, Loading and Predicting with the BGR Model

In [29]:
#saving the BGR model with sklearn:joblib
joblib.dump(bgr, 'bgr_eurusd_10062019.pkl')

['bgr_eurusd_10062019.pkl']

In [30]:
#loading the saved model
model = joblib.load('bgr_eurusd_10062019.pkl')

In [31]:
# Assuming 'X_scaled_test', 'y_test', and 'model' are predefined and model has been trained
# 'pred' contains the predictions made by the model
pred = model.predict(X_scaled_test)

# Convert index to milliseconds for LightningChart, assuming y_test.index is datetime
timestamp_ms = (y_test.index.astype('int64') // 10**6).tolist()

# Initialize the chart
chart = lc.ChartXY(
    theme=lc.Themes.Dark,
    title='Bagging Regressor Model'
)

# Adding the truth data series
series_truth = chart.add_line_series()
series_truth.append_samples(timestamp_ms, y_test.tolist())
series_truth.set_line_thickness(2)
series_truth.set_name('Truth')

# Adding the predicted data series
series_predicted = chart.add_line_series()
series_predicted.append_samples(timestamp_ms, pred.tolist())
series_predicted.set_line_thickness(2)
series_predicted.set_name('Predicted')

# Customize the axes
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')
x_axis.set_title('Date')

y_axis = chart.get_default_y_axis()
y_axis.set_title('Price')

# Add a legend to the chart
legend = chart.add_legend()
legend.add(series_truth)
legend.add(series_predicted)

# Open the chart
chart.open()

127.0.0.1 - - [19/Jul/2024 11:40:16] "GET / HTTP/1.1" 200 -


In [32]:
# Assuming 'X_scaled_test', 'y_test', and 'model' are predefined and model has been trained
# 'pred' contains the predictions made by the model
pred = model.predict(X_scaled_test)

# Initialize the LightningChart
chart = lc.ScatterChart(
    theme=lc.Themes.Dark,
    title='Regression Line for EURUSD',
    point_shape='circle',
    xlabel='True Values',
    ylabel='Predicted Values',
    individual_colors=False
)

# Adding the scatter data series
series = chart.add_point_series()
series.append_samples(y_test.tolist(), pred.tolist())

# Adding the regression line
line_series = chart.add_line_series()
# Create points for the line based on the min and max of the true values
min_val = min(y_test)
max_val = max(y_test)
line_series.append_samples([min_val, max_val], [min_val, max_val])
line_series.set_line_thickness(3)
line_series.set_name('Regression fit')

# Add a legend to the chart and only include the regression fit line
legend = chart.add_legend(data=line_series).set_title('')

# Open the chart
chart.open()

127.0.0.1 - - [19/Jul/2024 11:40:16] "GET / HTTP/1.1" 200 -


### GBR Model for higher performance

In [33]:
# Setup for Gradient Boosting Regressor using RandomizedSearchCV
param_dist_gbr = {
    'n_estimators': randint(50, 150),
    'max_depth': randint(3, 5),
    'learning_rate': [0.05, 0.1],
    'subsample': [0.8, 0.95]
}

# Initialize RandomizedSearchCV
random_search_gbr = RandomizedSearchCV(
    estimator=GradientBoostingRegressor(random_state=42),
    param_distributions=param_dist_gbr,
    n_iter=10,  # Number of parameter settings sampled
    cv=3,
    scoring='neg_mean_squared_error',
    n_jobs=-1,
    verbose=1
)

# Fit the model
random_search_gbr.fit(X_scaled_train, y_train)
best_gbr = random_search_gbr.best_estimator_

# Predictions
predictions_gbr = best_gbr.predict(X_scaled_test)

# Evaluation
print('Gradient Boosting Regressor Metrics:')
print('RMSE:', mean_squared_error(y_test, predictions_gbr, squared=False))
print('MAE:', mean_absolute_error(y_test, predictions_gbr))
print('R²:', r2_score(y_test, predictions_gbr))

Fitting 3 folds for each of 10 candidates, totalling 30 fits


Gradient Boosting Regressor Metrics:
RMSE: 0.0055567249310338055
MAE: 0.004357741651547801
R²: 0.9793241004672281


In [34]:
# Assuming 'data_test' and your predictions and truths are properly loaded
data_test['timestamp_ms'] = (data_test.index.astype('int64') // 10**6).tolist()

# Initialize the LightningChart
chart = lc.ChartXY(
    theme=lc.Themes.Dark,
    title='Euro vs USD - Prediction vs Truth - Gradient Boosting Regressor'
)

# Convert your data to lists (ensure this is done before appending to the series)
x_values = data_test['timestamp_ms'].tolist()
y_values_true = y_test.tolist()  # Assuming y_test is your true values Series
y_values_pred = predictions_gbr.tolist()  # Assuming predictions_gbr is your predictions Series

# Adding series for actual data and predictions
series_true = chart.add_line_series()
series_true.append_samples(x_values, y_values_true)
series_true.set_line_thickness(2)
series_true.set_name('Actual')

series_pred = chart.add_line_series()
series_pred.append_samples(x_values, y_values_pred)
series_pred.set_line_thickness(2)
series_pred.set_name('Predicted')

# Customize the axes
x_axis = chart.get_default_x_axis()
x_axis.set_tick_strategy('DateTime')
x_axis.set_title('Date')

y_axis = chart.get_default_y_axis()
y_axis.set_title('EUR/USD Exchange Rate')

# Add legend to the chart
legend = chart.add_legend()
legend.add(series_true)
legend.add(series_pred)

# Open the chart
chart.open()

127.0.0.1 - - [19/Jul/2024 11:40:04] "GET / HTTP/1.1" 200 -
